blob: f233c1d188942b89e1b6b3de4c4b1cfdc13e6438 [file] [log] [blame]
Elliott Hughes5b808042021-10-01 10:56:10 -07001/*************************************************
2* pcre2grep program *
3*************************************************/
4
5/* This is a grep program that uses the 8-bit PCRE regular expression library
6via the PCRE2 updated API to do its pattern matching. On Unix-like, Windows,
7and native z/OS systems it can recurse into directories, and in z/OS it can
8handle PDS files.
9
10Note that for native z/OS, in addition to defining the NATIVE_ZOS macro, an
11additional header is required. That header is not included in the main PCRE2
12distribution because other apparatus is needed to compile pcre2grep for z/OS.
13The header can be found in the special z/OS distribution, which is available
14from www.zaconsultants.net or from www.cbttape.org.
15
16 Copyright (c) 1997-2020 University of Cambridge
17
18-----------------------------------------------------------------------------
19Redistribution and use in source and binary forms, with or without
20modification, are permitted provided that the following conditions are met:
21
22 * Redistributions of source code must retain the above copyright notice,
23 this list of conditions and the following disclaimer.
24
25 * Redistributions in binary form must reproduce the above copyright
26 notice, this list of conditions and the following disclaimer in the
27 documentation and/or other materials provided with the distribution.
28
29 * Neither the name of the University of Cambridge nor the names of its
30 contributors may be used to endorse or promote products derived from
31 this software without specific prior written permission.
32
33THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
34AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
35IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
36ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
37LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
38CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
39SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
40INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
41CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
42ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
43POSSIBILITY OF SUCH DAMAGE.
44-----------------------------------------------------------------------------
45*/
46
47#ifdef HAVE_CONFIG_H
48#include "config.h"
49#endif
50
51#include <ctype.h>
52#include <locale.h>
53#include <stdio.h>
54#include <string.h>
55#include <stdlib.h>
56#include <errno.h>
57
58#include <sys/types.h>
59#include <sys/stat.h>
60
61#if (defined _WIN32 || (defined HAVE_WINDOWS_H && HAVE_WINDOWS_H)) \
62 && !defined WIN32 && !defined(__CYGWIN__)
63#define WIN32
64#endif
65
66/* Some CMake's define it still */
67#if defined(__CYGWIN__) && defined(WIN32)
68#undef WIN32
69#endif
70
71#ifdef __VMS
72#include clidef
73#include descrip
74#include lib$routines
75#endif
76
77#ifdef WIN32
78#include <io.h> /* For _setmode() */
79#include <fcntl.h> /* For _O_BINARY */
80#endif
81
82#if defined(SUPPORT_PCRE2GREP_CALLOUT) && defined(SUPPORT_PCRE2GREP_CALLOUT_FORK)
83#ifdef WIN32
84#include <process.h>
85#else
86#include <sys/wait.h>
87#endif
88#endif
89
90#ifdef HAVE_UNISTD_H
91#include <unistd.h>
92#endif
93
94#ifdef SUPPORT_LIBZ
95#include <zlib.h>
96#endif
97
98#ifdef SUPPORT_LIBBZ2
99#include <bzlib.h>
100#endif
101
102#define PCRE2_CODE_UNIT_WIDTH 8
103#include "pcre2.h"
104
105/* Older versions of MSVC lack snprintf(). This define allows for
106warning/error-free compilation and testing with MSVC compilers back to at least
107MSVC 10/2010. Except for VC6 (which is missing some fundamentals and fails). */
108
109#if defined(_MSC_VER) && (_MSC_VER < 1900)
110#define snprintf _snprintf
111#endif
112
113/* VC and older compilers don't support %td or %zu, and even some that claim to
114be C99 don't support it (hence DISABLE_PERCENT_ZT). */
115
116#if defined(_MSC_VER) || !defined(__STDC_VERSION__) || __STDC_VERSION__ < 199901L || defined(DISABLE_PERCENT_ZT)
117#define PTR_FORM "lu"
118#define SIZ_FORM "lu"
119#define SIZ_CAST (unsigned long int)
120#else
121#define PTR_FORM "td"
122#define SIZ_FORM "zu"
123#define SIZ_CAST
124#endif
125
126#define FALSE 0
127#define TRUE 1
128
129typedef int BOOL;
130
131#define DEFAULT_CAPTURE_MAX 50
132
133#if BUFSIZ > 8192
134#define MAXPATLEN BUFSIZ
135#else
136#define MAXPATLEN 8192
137#endif
138
139#define FNBUFSIZ 2048
140#define ERRBUFSIZ 256
141
142/* Values for the "filenames" variable, which specifies options for file name
143output. The order is important; it is assumed that a file name is wanted for
144all values greater than FN_DEFAULT. */
145
146enum { FN_NONE, FN_DEFAULT, FN_MATCH_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
147
148/* File reading styles */
149
150enum { FR_PLAIN, FR_LIBZ, FR_LIBBZ2 };
151
152/* Actions for the -d and -D options */
153
154enum { dee_READ, dee_SKIP, dee_RECURSE };
155enum { DEE_READ, DEE_SKIP };
156
157/* Actions for special processing options (flag bits) */
158
159#define PO_WORD_MATCH 0x0001
160#define PO_LINE_MATCH 0x0002
161#define PO_FIXED_STRINGS 0x0004
162
163/* Binary file options */
164
165enum { BIN_BINARY, BIN_NOMATCH, BIN_TEXT };
166
167/* Return values from decode_dollar_escape() */
168
169enum { DDE_ERROR, DDE_CAPTURE, DDE_CHAR };
170
171/* In newer versions of gcc, with FORTIFY_SOURCE set (the default in some
172environments), a warning is issued if the value of fwrite() is ignored.
173Unfortunately, casting to (void) does not suppress the warning. To get round
174this, we use a macro that compiles a fudge. Oddly, this does not also seem to
175apply to fprintf(). */
176
177#define FWRITE_IGNORE(a,b,c,d) if (fwrite(a,b,c,d)) {}
178
179/* Under Windows, we have to set stdout to be binary, so that it does not
180convert \r\n at the ends of output lines to \r\r\n. However, that means that
181any messages written to stdout must have \r\n as their line terminator. This is
182handled by using STDOUT_NL as the newline string. We also use a normal double
183quote for the example, as single quotes aren't usually available. */
184
185#ifdef WIN32
186#define STDOUT_NL "\r\n"
187#define STDOUT_NL_LEN 2
188#define QUOT "\""
189#else
190#define STDOUT_NL "\n"
191#define STDOUT_NL_LEN 1
192#define QUOT "'"
193#endif
194
195/* This code is returned from decode_dollar_escape() when $n is encountered,
196and used to mean "output STDOUT_NL". It is, of course, not a valid Unicode code
197point. */
198
199#define STDOUT_NL_CODE 0x7fffffffu
200
201
202
203/*************************************************
204* Global variables *
205*************************************************/
206
207/* Jeffrey Friedl has some debugging requirements that are not part of the
208regular code. */
209
210#ifdef JFRIEDL_DEBUG
211static int S_arg = -1;
212static unsigned int jfriedl_XR = 0; /* repeat regex attempt this many times */
213static unsigned int jfriedl_XT = 0; /* replicate text this many times */
214static const char *jfriedl_prefix = "";
215static const char *jfriedl_postfix = "";
216#endif
217
218static const char *colour_string = "1;31";
219static const char *colour_option = NULL;
220static const char *dee_option = NULL;
221static const char *DEE_option = NULL;
222static const char *locale = NULL;
223static const char *newline_arg = NULL;
224static const char *om_separator = NULL;
225static const char *stdin_name = "(standard input)";
226static const char *output_text = NULL;
227
228static char *main_buffer = NULL;
229
230static int after_context = 0;
231static int before_context = 0;
232static int binary_files = BIN_BINARY;
233static int both_context = 0;
234static int bufthird = PCRE2GREP_BUFSIZE;
235static int max_bufthird = PCRE2GREP_MAX_BUFSIZE;
236static int bufsize = 3*PCRE2GREP_BUFSIZE;
237static int endlinetype;
238
239static int count_limit = -1; /* Not long, so that it works with OP_NUMBER */
240static unsigned long int counts_printed = 0;
241static unsigned long int total_count = 0;
242
243#ifdef WIN32
244static int dee_action = dee_SKIP;
245#else
246static int dee_action = dee_READ;
247#endif
248
249static int DEE_action = DEE_READ;
250static int error_count = 0;
251static int filenames = FN_DEFAULT;
252
253#ifdef SUPPORT_PCRE2GREP_JIT
254static BOOL use_jit = TRUE;
255#else
256static BOOL use_jit = FALSE;
257#endif
258
259static const uint8_t *character_tables = NULL;
260
261static uint32_t pcre2_options = 0;
262static uint32_t extra_options = 0;
263static PCRE2_SIZE heap_limit = PCRE2_UNSET;
264static uint32_t match_limit = 0;
265static uint32_t depth_limit = 0;
266
267static pcre2_compile_context *compile_context;
268static pcre2_match_context *match_context;
269static pcre2_match_data *match_data;
270static PCRE2_SIZE *offsets;
271static uint32_t offset_size;
272static uint32_t capture_max = DEFAULT_CAPTURE_MAX;
273
274static BOOL count_only = FALSE;
275static BOOL do_colour = FALSE;
276#ifdef WIN32
277static BOOL do_ansi = FALSE;
278#endif
279static BOOL file_offsets = FALSE;
280static BOOL hyphenpending = FALSE;
281static BOOL invert = FALSE;
282static BOOL line_buffered = FALSE;
283static BOOL line_offsets = FALSE;
284static BOOL multiline = FALSE;
285static BOOL number = FALSE;
286static BOOL omit_zero_count = FALSE;
287static BOOL resource_error = FALSE;
288static BOOL quiet = FALSE;
289static BOOL show_total_count = FALSE;
290static BOOL silent = FALSE;
291static BOOL utf = FALSE;
292
293static uint8_t utf8_buffer[8];
294
295
296/* Structure for list of --only-matching capturing numbers. */
297
298typedef struct omstr {
299 struct omstr *next;
300 int groupnum;
301} omstr;
302
303static omstr *only_matching = NULL;
304static omstr *only_matching_last = NULL;
305static int only_matching_count;
306
307/* Structure for holding the two variables that describe a number chain. */
308
309typedef struct omdatastr {
310 omstr **anchor;
311 omstr **lastptr;
312} omdatastr;
313
314static omdatastr only_matching_data = { &only_matching, &only_matching_last };
315
316/* Structure for list of file names (for -f and --{in,ex}clude-from) */
317
318typedef struct fnstr {
319 struct fnstr *next;
320 char *name;
321} fnstr;
322
323static fnstr *exclude_from = NULL;
324static fnstr *exclude_from_last = NULL;
325static fnstr *include_from = NULL;
326static fnstr *include_from_last = NULL;
327
328static fnstr *file_lists = NULL;
329static fnstr *file_lists_last = NULL;
330static fnstr *pattern_files = NULL;
331static fnstr *pattern_files_last = NULL;
332
333/* Structure for holding the two variables that describe a file name chain. */
334
335typedef struct fndatastr {
336 fnstr **anchor;
337 fnstr **lastptr;
338} fndatastr;
339
340static fndatastr exclude_from_data = { &exclude_from, &exclude_from_last };
341static fndatastr include_from_data = { &include_from, &include_from_last };
342static fndatastr file_lists_data = { &file_lists, &file_lists_last };
343static fndatastr pattern_files_data = { &pattern_files, &pattern_files_last };
344
345/* Structure for pattern and its compiled form; used for matching patterns and
346also for include/exclude patterns. */
347
348typedef struct patstr {
349 struct patstr *next;
350 char *string;
351 PCRE2_SIZE length;
352 pcre2_code *compiled;
353} patstr;
354
355static patstr *patterns = NULL;
356static patstr *patterns_last = NULL;
357static patstr *include_patterns = NULL;
358static patstr *include_patterns_last = NULL;
359static patstr *exclude_patterns = NULL;
360static patstr *exclude_patterns_last = NULL;
361static patstr *include_dir_patterns = NULL;
362static patstr *include_dir_patterns_last = NULL;
363static patstr *exclude_dir_patterns = NULL;
364static patstr *exclude_dir_patterns_last = NULL;
365
366/* Structure holding the two variables that describe a pattern chain. A pointer
367to such structures is used for each appropriate option. */
368
369typedef struct patdatastr {
370 patstr **anchor;
371 patstr **lastptr;
372} patdatastr;
373
374static patdatastr match_patdata = { &patterns, &patterns_last };
375static patdatastr include_patdata = { &include_patterns, &include_patterns_last };
376static patdatastr exclude_patdata = { &exclude_patterns, &exclude_patterns_last };
377static patdatastr include_dir_patdata = { &include_dir_patterns, &include_dir_patterns_last };
378static patdatastr exclude_dir_patdata = { &exclude_dir_patterns, &exclude_dir_patterns_last };
379
380static patstr **incexlist[4] = { &include_patterns, &exclude_patterns,
381 &include_dir_patterns, &exclude_dir_patterns };
382
383static const char *incexname[4] = { "--include", "--exclude",
384 "--include-dir", "--exclude-dir" };
385
386/* Structure for options and list of them */
387
388enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_U32NUMBER, OP_SIZE,
389 OP_OP_NUMBER, OP_OP_NUMBERS, OP_PATLIST, OP_FILELIST, OP_BINFILES };
390
391typedef struct option_item {
392 int type;
393 int one_char;
394 void *dataptr;
395 const char *long_name;
396 const char *help_text;
397} option_item;
398
399/* Options without a single-letter equivalent get a negative value. This can be
400used to identify them. */
401
402#define N_COLOUR (-1)
403#define N_EXCLUDE (-2)
404#define N_EXCLUDE_DIR (-3)
405#define N_HELP (-4)
406#define N_INCLUDE (-5)
407#define N_INCLUDE_DIR (-6)
408#define N_LABEL (-7)
409#define N_LOCALE (-8)
410#define N_NULL (-9)
411#define N_LOFFSETS (-10)
412#define N_FOFFSETS (-11)
413#define N_LBUFFER (-12)
414#define N_H_LIMIT (-13)
415#define N_M_LIMIT (-14)
416#define N_M_LIMIT_DEP (-15)
417#define N_BUFSIZE (-16)
418#define N_NOJIT (-17)
419#define N_FILE_LIST (-18)
420#define N_BINARY_FILES (-19)
421#define N_EXCLUDE_FROM (-20)
422#define N_INCLUDE_FROM (-21)
423#define N_OM_SEPARATOR (-22)
424#define N_MAX_BUFSIZE (-23)
425#define N_OM_CAPTURE (-24)
426#define N_ALLABSK (-25)
427
428static option_item optionlist[] = {
429 { OP_NODATA, N_NULL, NULL, "", "terminate options" },
430 { OP_NODATA, N_HELP, NULL, "help", "display this help and exit" },
431 { OP_NUMBER, 'A', &after_context, "after-context=number", "set number of following context lines" },
432 { OP_NODATA, 'a', NULL, "text", "treat binary files as text" },
433 { OP_NUMBER, 'B', &before_context, "before-context=number", "set number of prior context lines" },
434 { OP_BINFILES, N_BINARY_FILES, NULL, "binary-files=word", "set treatment of binary files" },
435 { OP_NUMBER, N_BUFSIZE,&bufthird, "buffer-size=number", "set processing buffer starting size" },
436 { OP_NUMBER, N_MAX_BUFSIZE,&max_bufthird, "max-buffer-size=number", "set processing buffer maximum size" },
437 { OP_OP_STRING, N_COLOUR, &colour_option, "color=option", "matched text color option" },
438 { OP_OP_STRING, N_COLOUR, &colour_option, "colour=option", "matched text colour option" },
439 { OP_NUMBER, 'C', &both_context, "context=number", "set number of context lines, before & after" },
440 { OP_NODATA, 'c', NULL, "count", "print only a count of matching lines per FILE" },
441 { OP_STRING, 'D', &DEE_option, "devices=action","how to handle devices, FIFOs, and sockets" },
442 { OP_STRING, 'd', &dee_option, "directories=action", "how to handle directories" },
443 { OP_PATLIST, 'e', &match_patdata, "regex(p)=pattern", "specify pattern (may be used more than once)" },
444 { OP_NODATA, 'F', NULL, "fixed-strings", "patterns are sets of newline-separated strings" },
445 { OP_FILELIST, 'f', &pattern_files_data, "file=path", "read patterns from file" },
446 { OP_FILELIST, N_FILE_LIST, &file_lists_data, "file-list=path","read files to search from file" },
447 { OP_NODATA, N_FOFFSETS, NULL, "file-offsets", "output file offsets, not text" },
448 { OP_NODATA, 'H', NULL, "with-filename", "force the prefixing filename on output" },
449 { OP_NODATA, 'h', NULL, "no-filename", "suppress the prefixing filename on output" },
450 { OP_NODATA, 'I', NULL, "", "treat binary files as not matching (ignore)" },
451 { OP_NODATA, 'i', NULL, "ignore-case", "ignore case distinctions" },
452 { OP_NODATA, 'l', NULL, "files-with-matches", "print only FILE names containing matches" },
453 { OP_NODATA, 'L', NULL, "files-without-match","print only FILE names not containing matches" },
454 { OP_STRING, N_LABEL, &stdin_name, "label=name", "set name for standard input" },
455 { OP_NODATA, N_LBUFFER, NULL, "line-buffered", "use line buffering" },
456 { OP_NODATA, N_LOFFSETS, NULL, "line-offsets", "output line numbers and offsets, not text" },
457 { OP_STRING, N_LOCALE, &locale, "locale=locale", "use the named locale" },
458 { OP_SIZE, N_H_LIMIT, &heap_limit, "heap-limit=number", "set PCRE2 heap limit option (kibibytes)" },
459 { OP_U32NUMBER, N_M_LIMIT, &match_limit, "match-limit=number", "set PCRE2 match limit option" },
460 { OP_U32NUMBER, N_M_LIMIT_DEP, &depth_limit, "depth-limit=number", "set PCRE2 depth limit option" },
461 { OP_U32NUMBER, N_M_LIMIT_DEP, &depth_limit, "recursion-limit=number", "obsolete synonym for depth-limit" },
462 { OP_NODATA, 'M', NULL, "multiline", "run in multiline mode" },
463 { OP_NUMBER, 'm', &count_limit, "max-count=number", "stop after <number> matched lines" },
464 { OP_STRING, 'N', &newline_arg, "newline=type", "set newline type (CR, LF, CRLF, ANYCRLF, ANY, or NUL)" },
465 { OP_NODATA, 'n', NULL, "line-number", "print line number with output lines" },
466#ifdef SUPPORT_PCRE2GREP_JIT
467 { OP_NODATA, N_NOJIT, NULL, "no-jit", "do not use just-in-time compiler optimization" },
468#else
469 { OP_NODATA, N_NOJIT, NULL, "no-jit", "ignored: this pcre2grep does not support JIT" },
470#endif
471 { OP_STRING, 'O', &output_text, "output=text", "show only this text (possibly expanded)" },
472 { OP_OP_NUMBERS, 'o', &only_matching_data, "only-matching=n", "show only the part of the line that matched" },
473 { OP_STRING, N_OM_SEPARATOR, &om_separator, "om-separator=text", "set separator for multiple -o output" },
474 { OP_U32NUMBER, N_OM_CAPTURE, &capture_max, "om-capture=n", "set capture count for --only-matching" },
475 { OP_NODATA, 'q', NULL, "quiet", "suppress output, just set return code" },
476 { OP_NODATA, 'r', NULL, "recursive", "recursively scan sub-directories" },
477 { OP_PATLIST, N_EXCLUDE,&exclude_patdata, "exclude=pattern","exclude matching files when recursing" },
478 { OP_PATLIST, N_INCLUDE,&include_patdata, "include=pattern","include matching files when recursing" },
479 { OP_PATLIST, N_EXCLUDE_DIR,&exclude_dir_patdata, "exclude-dir=pattern","exclude matching directories when recursing" },
480 { OP_PATLIST, N_INCLUDE_DIR,&include_dir_patdata, "include-dir=pattern","include matching directories when recursing" },
481 { OP_FILELIST, N_EXCLUDE_FROM,&exclude_from_data, "exclude-from=path", "read exclude list from file" },
482 { OP_FILELIST, N_INCLUDE_FROM,&include_from_data, "include-from=path", "read include list from file" },
483#ifdef JFRIEDL_DEBUG
484 { OP_OP_NUMBER, 'S', &S_arg, "jeffS", "replace matched (sub)string with X" },
485#endif
486 { OP_NODATA, 's', NULL, "no-messages", "suppress error messages" },
487 { OP_NODATA, 't', NULL, "total-count", "print total count of matching lines" },
488 { OP_NODATA, 'u', NULL, "utf", "use UTF mode" },
489 { OP_NODATA, 'U', NULL, "utf-allow-invalid", "use UTF mode, allow for invalid code units" },
490 { OP_NODATA, 'V', NULL, "version", "print version information and exit" },
491 { OP_NODATA, 'v', NULL, "invert-match", "select non-matching lines" },
492 { OP_NODATA, 'w', NULL, "word-regex(p)", "force patterns to match only as words" },
493 { OP_NODATA, 'x', NULL, "line-regex(p)", "force patterns to match only whole lines" },
494 { OP_NODATA, N_ALLABSK, NULL, "allow-lookaround-bsk", "allow \\K in lookarounds" },
495 { OP_NODATA, 0, NULL, NULL, NULL }
496};
497
498/* Table of names for newline types. Must be kept in step with the definitions
499of PCRE2_NEWLINE_xx in pcre2.h. */
500
501static const char *newlines[] = {
502 "DEFAULT", "CR", "LF", "CRLF", "ANY", "ANYCRLF", "NUL" };
503
504/* UTF-8 tables */
505
506const int utf8_table1[] =
507 { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff};
508const int utf8_table1_size = sizeof(utf8_table1) / sizeof(int);
509
510const int utf8_table2[] = { 0, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc};
511const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
512
513const char utf8_table4[] = {
514 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
515 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
516 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
517 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
518
519
520#if !defined(VPCOMPAT) && !defined(HAVE_MEMMOVE)
521/*************************************************
522* Emulated memmove() for systems without it *
523*************************************************/
524
525/* This function can make use of bcopy() if it is available. Otherwise do it by
526steam, as there are some non-Unix environments that lack both memmove() and
527bcopy(). */
528
529static void *
530emulated_memmove(void *d, const void *s, size_t n)
531{
532#ifdef HAVE_BCOPY
533bcopy(s, d, n);
534return d;
535#else
536size_t i;
537unsigned char *dest = (unsigned char *)d;
538const unsigned char *src = (const unsigned char *)s;
539if (dest > src)
540 {
541 dest += n;
542 src += n;
543 for (i = 0; i < n; ++i) *(--dest) = *(--src);
544 return (void *)dest;
545 }
546else
547 {
548 for (i = 0; i < n; ++i) *dest++ = *src++;
549 return (void *)(dest - n);
550 }
551#endif /* not HAVE_BCOPY */
552}
553#undef memmove
554#define memmove(d,s,n) emulated_memmove(d,s,n)
555#endif /* not VPCOMPAT && not HAVE_MEMMOVE */
556
557
558
559/*************************************************
560* Convert code point to UTF-8 *
561*************************************************/
562
563/* A static buffer is used. Returns the number of bytes. */
564
565static int
566ord2utf8(uint32_t value)
567{
568int i, j;
569uint8_t *utf8bytes = utf8_buffer;
570for (i = 0; i < utf8_table1_size; i++)
571 if (value <= (uint32_t)utf8_table1[i]) break;
572utf8bytes += i;
573for (j = i; j > 0; j--)
574 {
575 *utf8bytes-- = 0x80 | (value & 0x3f);
576 value >>= 6;
577 }
578*utf8bytes = utf8_table2[i] | value;
579return i + 1;
580}
581
582
583
584/*************************************************
585* Case-independent string compare *
586*************************************************/
587
588static int
589strcmpic(const char *str1, const char *str2)
590{
591unsigned int c1, c2;
592while (*str1 != '\0' || *str2 != '\0')
593 {
594 c1 = tolower(*str1++);
595 c2 = tolower(*str2++);
596 if (c1 != c2) return ((c1 > c2) << 1) - 1;
597 }
598return 0;
599}
600
601
602/*************************************************
603* Parse GREP_COLORS *
604*************************************************/
605
606/* Extract ms or mt from GREP_COLORS.
607
608Argument: the string, possibly NULL
609Returns: the value of ms or mt, or NULL if neither present
610*/
611
612static char *
613parse_grep_colors(const char *gc)
614{
615static char seq[16];
616char *col;
617uint32_t len;
618if (gc == NULL) return NULL;
619col = strstr(gc, "ms=");
620if (col == NULL) col = strstr(gc, "mt=");
621if (col == NULL) return NULL;
622len = 0;
623col += 3;
624while (*col != ':' && *col != 0 && len < sizeof(seq)-1)
625 seq[len++] = *col++;
626seq[len] = 0;
627return seq;
628}
629
630
631/*************************************************
632* Exit from the program *
633*************************************************/
634
635/* If there has been a resource error, give a suitable message.
636
637Argument: the return code
638Returns: does not return
639*/
640
641static void
642pcre2grep_exit(int rc)
643{
644/* VMS does exit codes differently: both exit(1) and exit(0) return with a
645status of 1, which is not helpful. To help with this problem, define a symbol
646(akin to an environment variable) called "PCRE2GREP_RC" and put the exit code
647therein. */
648
649#ifdef __VMS
650 char val_buf[4];
651 $DESCRIPTOR(sym_nam, "PCRE2GREP_RC");
652 $DESCRIPTOR(sym_val, val_buf);
653 sprintf(val_buf, "%d", rc);
654 sym_val.dsc$w_length = strlen(val_buf);
655 lib$set_symbol(&sym_nam, &sym_val);
656#endif
657
658if (resource_error)
659 {
660 fprintf(stderr, "pcre2grep: Error %d, %d, %d or %d means that a resource "
661 "limit was exceeded.\n", PCRE2_ERROR_JIT_STACKLIMIT, PCRE2_ERROR_MATCHLIMIT,
662 PCRE2_ERROR_DEPTHLIMIT, PCRE2_ERROR_HEAPLIMIT);
663 fprintf(stderr, "pcre2grep: Check your regex for nested unlimited loops.\n");
664 }
665exit(rc);
666}
667
668
669/*************************************************
670* Add item to chain of patterns *
671*************************************************/
672
673/* Used to add an item onto a chain, or just return an unconnected item if the
674"after" argument is NULL.
675
676Arguments:
677 s pattern string to add
678 patlen length of pattern
679 after if not NULL points to item to insert after
680
681Returns: new pattern block or NULL on error
682*/
683
684static patstr *
685add_pattern(char *s, PCRE2_SIZE patlen, patstr *after)
686{
687patstr *p = (patstr *)malloc(sizeof(patstr));
688if (p == NULL)
689 {
690 fprintf(stderr, "pcre2grep: malloc failed\n");
691 pcre2grep_exit(2);
692 }
693if (patlen > MAXPATLEN)
694 {
695 fprintf(stderr, "pcre2grep: pattern is too long (limit is %d bytes)\n",
696 MAXPATLEN);
697 free(p);
698 return NULL;
699 }
700p->next = NULL;
701p->string = s;
702p->length = patlen;
703p->compiled = NULL;
704
705if (after != NULL)
706 {
707 p->next = after->next;
708 after->next = p;
709 }
710return p;
711}
712
713
714/*************************************************
715* Free chain of patterns *
716*************************************************/
717
718/* Used for several chains of patterns.
719
720Argument: pointer to start of chain
721Returns: nothing
722*/
723
724static void
725free_pattern_chain(patstr *pc)
726{
727while (pc != NULL)
728 {
729 patstr *p = pc;
730 pc = p->next;
731 if (p->compiled != NULL) pcre2_code_free(p->compiled);
732 free(p);
733 }
734}
735
736
737/*************************************************
738* Free chain of file names *
739*************************************************/
740
741/*
742Argument: pointer to start of chain
743Returns: nothing
744*/
745
746static void
747free_file_chain(fnstr *fn)
748{
749while (fn != NULL)
750 {
751 fnstr *f = fn;
752 fn = f->next;
753 free(f);
754 }
755}
756
757
758/*************************************************
759* OS-specific functions *
760*************************************************/
761
762/* These definitions are needed in all Windows environments, even those where
763Unix-style directory scanning can be used (see below). */
764
765#ifdef WIN32
766
767#ifndef STRICT
768# define STRICT
769#endif
770#ifndef WIN32_LEAN_AND_MEAN
771# define WIN32_LEAN_AND_MEAN
772#endif
773
774#include <windows.h>
775
776#define iswild(name) (strpbrk(name, "*?") != NULL)
777
778/* Convert ANSI BGR format to RGB used by Windows */
779#define BGR_RGB(x) ((x & 1 ? 4 : 0) | (x & 2) | (x & 4 ? 1 : 0))
780
781static HANDLE hstdout;
782static CONSOLE_SCREEN_BUFFER_INFO csbi;
783static WORD match_colour;
784
785static WORD
786decode_ANSI_colour(const char *cs)
787{
788WORD result = csbi.wAttributes;
789while (*cs)
790 {
791 if (isdigit(*cs))
792 {
793 int code = atoi(cs);
794 if (code == 1) result |= 0x08;
795 else if (code == 4) result |= 0x8000;
796 else if (code == 5) result |= 0x80;
797 else if (code >= 30 && code <= 37) result = (result & 0xF8) | BGR_RGB(code - 30);
798 else if (code == 39) result = (result & 0xF0) | (csbi.wAttributes & 0x0F);
799 else if (code >= 40 && code <= 47) result = (result & 0x8F) | (BGR_RGB(code - 40) << 4);
800 else if (code == 49) result = (result & 0x0F) | (csbi.wAttributes & 0xF0);
801 /* aixterm high intensity colour codes */
802 else if (code >= 90 && code <= 97) result = (result & 0xF0) | BGR_RGB(code - 90) | 0x08;
803 else if (code >= 100 && code <= 107) result = (result & 0x0F) | (BGR_RGB(code - 100) << 4) | 0x80;
804
805 while (isdigit(*cs)) cs++;
806 }
807 if (*cs) cs++;
808 }
809return result;
810}
811
812
813static void
814init_colour_output()
815{
816if (do_colour)
817 {
818 hstdout = GetStdHandle(STD_OUTPUT_HANDLE);
819 /* This fails when redirected to con; try again if so. */
820 if (!GetConsoleScreenBufferInfo(hstdout, &csbi) && !do_ansi)
821 {
822 HANDLE hcon = CreateFile("CONOUT$", GENERIC_READ | GENERIC_WRITE,
823 FILE_SHARE_WRITE, NULL, OPEN_EXISTING, 0, NULL);
824 GetConsoleScreenBufferInfo(hcon, &csbi);
825 CloseHandle(hcon);
826 }
827 match_colour = decode_ANSI_colour(colour_string);
828 /* No valid colour found - turn off colouring */
829 if (!match_colour) do_colour = FALSE;
830 }
831}
832
833#endif /* WIN32 */
834
835
836/* The following sets of functions are defined so that they can be made system
837specific. At present there are versions for Unix-style environments, Windows,
838native z/OS, and "no support". */
839
840
841/************* Directory scanning Unix-style and z/OS ***********/
842
843#if (defined HAVE_SYS_STAT_H && defined HAVE_DIRENT_H && defined HAVE_SYS_TYPES_H) || defined NATIVE_ZOS
844#include <sys/types.h>
845#include <sys/stat.h>
846#include <dirent.h>
847
848#if defined NATIVE_ZOS
849/************* Directory and PDS/E scanning for z/OS ***********/
850/************* z/OS looks mostly like Unix with USS ************/
851/* However, z/OS needs the #include statements in this header */
852#include "pcrzosfs.h"
853/* That header is not included in the main PCRE distribution because
854 other apparatus is needed to compile pcre2grep for z/OS. The header
855 can be found in the special z/OS distribution, which is available
856 from www.zaconsultants.net or from www.cbttape.org. */
857#endif
858
859typedef DIR directory_type;
860#define FILESEP '/'
861
862static int
863isdirectory(char *filename)
864{
865struct stat statbuf;
866if (stat(filename, &statbuf) < 0)
867 return 0; /* In the expectation that opening as a file will fail */
868return S_ISDIR(statbuf.st_mode);
869}
870
871static directory_type *
872opendirectory(char *filename)
873{
874return opendir(filename);
875}
876
877static char *
878readdirectory(directory_type *dir)
879{
880for (;;)
881 {
882 struct dirent *dent = readdir(dir);
883 if (dent == NULL) return NULL;
884 if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
885 return dent->d_name;
886 }
887/* Control never reaches here */
888}
889
890static void
891closedirectory(directory_type *dir)
892{
893closedir(dir);
894}
895
896
897/************* Test for regular file, Unix-style **********/
898
899static int
900isregfile(char *filename)
901{
902struct stat statbuf;
903if (stat(filename, &statbuf) < 0)
904 return 1; /* In the expectation that opening as a file will fail */
905return S_ISREG(statbuf.st_mode);
906}
907
908
909#if defined NATIVE_ZOS
910/************* Test for a terminal in z/OS **********/
911/* isatty() does not work in a TSO environment, so always give FALSE.*/
912
913static BOOL
914is_stdout_tty(void)
915{
916return FALSE;
917}
918
919static BOOL
920is_file_tty(FILE *f)
921{
922return FALSE;
923}
924
925
926/************* Test for a terminal, Unix-style **********/
927
928#else
929static BOOL
930is_stdout_tty(void)
931{
932return isatty(fileno(stdout));
933}
934
935static BOOL
936is_file_tty(FILE *f)
937{
938return isatty(fileno(f));
939}
940#endif
941
942
943/************* Print optionally coloured match Unix-style and z/OS **********/
944
945static void
946print_match(const void *buf, int length)
947{
948if (length == 0) return;
949if (do_colour) fprintf(stdout, "%c[%sm", 0x1b, colour_string);
950FWRITE_IGNORE(buf, 1, length, stdout);
951if (do_colour) fprintf(stdout, "%c[0m", 0x1b);
952}
953
954/* End of Unix-style or native z/OS environment functions. */
955
956
957/************* Directory scanning in Windows ***********/
958
959/* I (Philip Hazel) have no means of testing this code. It was contributed by
960Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
961when it did not exist. David Byron added a patch that moved the #include of
962<windows.h> to before the INVALID_FILE_ATTRIBUTES definition rather than after.
963*/
964
965#elif defined WIN32
966
967#ifndef INVALID_FILE_ATTRIBUTES
968#define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF
969#endif
970
971typedef struct directory_type
972{
973HANDLE handle;
974BOOL first;
975WIN32_FIND_DATA data;
976} directory_type;
977
978#define FILESEP '/'
979
980int
981isdirectory(char *filename)
982{
983DWORD attr = GetFileAttributes(filename);
984if (attr == INVALID_FILE_ATTRIBUTES)
985 return 0;
986return (attr & FILE_ATTRIBUTE_DIRECTORY) != 0;
987}
988
989directory_type *
990opendirectory(char *filename)
991{
992size_t len;
993char *pattern;
994directory_type *dir;
995DWORD err;
996len = strlen(filename);
997pattern = (char *)malloc(len + 3);
998dir = (directory_type *)malloc(sizeof(*dir));
999if ((pattern == NULL) || (dir == NULL))
1000 {
1001 fprintf(stderr, "pcre2grep: malloc failed\n");
1002 pcre2grep_exit(2);
1003 }
1004memcpy(pattern, filename, len);
1005if (iswild(filename))
1006 pattern[len] = 0;
1007else
1008 memcpy(&(pattern[len]), "\\*", 3);
1009dir->handle = FindFirstFile(pattern, &(dir->data));
1010if (dir->handle != INVALID_HANDLE_VALUE)
1011 {
1012 free(pattern);
1013 dir->first = TRUE;
1014 return dir;
1015 }
1016err = GetLastError();
1017free(pattern);
1018free(dir);
1019errno = (err == ERROR_ACCESS_DENIED) ? EACCES : ENOENT;
1020return NULL;
1021}
1022
1023char *
1024readdirectory(directory_type *dir)
1025{
1026for (;;)
1027 {
1028 if (!dir->first)
1029 {
1030 if (!FindNextFile(dir->handle, &(dir->data)))
1031 return NULL;
1032 }
1033 else
1034 {
1035 dir->first = FALSE;
1036 }
1037 if (strcmp(dir->data.cFileName, ".") != 0 && strcmp(dir->data.cFileName, "..") != 0)
1038 return dir->data.cFileName;
1039 }
1040#ifndef _MSC_VER
1041return NULL; /* Keep compiler happy; never executed */
1042#endif
1043}
1044
1045void
1046closedirectory(directory_type *dir)
1047{
1048FindClose(dir->handle);
1049free(dir);
1050}
1051
1052
1053/************* Test for regular file in Windows **********/
1054
1055/* I don't know how to do this, or if it can be done; assume all paths are
1056regular if they are not directories. */
1057
1058int isregfile(char *filename)
1059{
1060return !isdirectory(filename);
1061}
1062
1063
1064/************* Test for a terminal in Windows **********/
1065
1066static BOOL
1067is_stdout_tty(void)
1068{
1069return _isatty(_fileno(stdout));
1070}
1071
1072static BOOL
1073is_file_tty(FILE *f)
1074{
1075return _isatty(_fileno(f));
1076}
1077
1078
1079/************* Print optionally coloured match in Windows **********/
1080
1081static void
1082print_match(const void *buf, int length)
1083{
1084if (length == 0) return;
1085if (do_colour)
1086 {
1087 if (do_ansi) fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1088 else SetConsoleTextAttribute(hstdout, match_colour);
1089 }
1090FWRITE_IGNORE(buf, 1, length, stdout);
1091if (do_colour)
1092 {
1093 if (do_ansi) fprintf(stdout, "%c[0m", 0x1b);
1094 else SetConsoleTextAttribute(hstdout, csbi.wAttributes);
1095 }
1096}
1097
1098/* End of Windows functions */
1099
1100
1101/************* Directory scanning when we can't do it ***********/
1102
1103/* The type is void, and apart from isdirectory(), the functions do nothing. */
1104
1105#else
1106
1107#define FILESEP 0
1108typedef void directory_type;
1109
1110int isdirectory(char *filename) { return 0; }
1111directory_type * opendirectory(char *filename) { return (directory_type*)0;}
1112char *readdirectory(directory_type *dir) { return (char*)0;}
1113void closedirectory(directory_type *dir) {}
1114
1115
1116/************* Test for regular file when we can't do it **********/
1117
1118/* Assume all files are regular. */
1119
1120int isregfile(char *filename) { return 1; }
1121
1122
1123/************* Test for a terminal when we can't do it **********/
1124
1125static BOOL
1126is_stdout_tty(void)
1127{
1128return FALSE;
1129}
1130
1131static BOOL
1132is_file_tty(FILE *f)
1133{
1134return FALSE;
1135}
1136
1137
1138/************* Print optionally coloured match when we can't do it **********/
1139
1140static void
1141print_match(const void *buf, int length)
1142{
1143if (length == 0) return;
1144FWRITE_IGNORE(buf, 1, length, stdout);
1145}
1146
1147#endif /* End of system-specific functions */
1148
1149
1150
1151#ifndef HAVE_STRERROR
1152/*************************************************
1153* Provide strerror() for non-ANSI libraries *
1154*************************************************/
1155
1156/* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
1157in their libraries, but can provide the same facility by this simple
1158alternative function. */
1159
1160extern int sys_nerr;
1161extern char *sys_errlist[];
1162
1163char *
1164strerror(int n)
1165{
1166if (n < 0 || n >= sys_nerr) return "unknown error number";
1167return sys_errlist[n];
1168}
1169#endif /* HAVE_STRERROR */
1170
1171
1172
1173/*************************************************
1174* Usage function *
1175*************************************************/
1176
1177static int
1178usage(int rc)
1179{
1180option_item *op;
1181fprintf(stderr, "Usage: pcre2grep [-");
1182for (op = optionlist; op->one_char != 0; op++)
1183 {
1184 if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);
1185 }
1186fprintf(stderr, "] [long options] [pattern] [files]\n");
1187fprintf(stderr, "Type \"pcre2grep --help\" for more information and the long "
1188 "options.\n");
1189return rc;
1190}
1191
1192
1193
1194/*************************************************
1195* Help function *
1196*************************************************/
1197
1198static void
1199help(void)
1200{
1201option_item *op;
1202
1203printf("Usage: pcre2grep [OPTION]... [PATTERN] [FILE1 FILE2 ...]" STDOUT_NL);
1204printf("Search for PATTERN in each FILE or standard input." STDOUT_NL);
1205printf("PATTERN must be present if neither -e nor -f is used." STDOUT_NL);
1206
1207#ifdef SUPPORT_PCRE2GREP_CALLOUT
1208#ifdef SUPPORT_PCRE2GREP_CALLOUT_FORK
1209printf("All callout scripts in patterns are supported." STDOUT_NL);
1210#else
1211printf("Non-fork callout scripts in patterns are supported." STDOUT_NL);
1212#endif
1213#else
1214printf("Callout scripts are not supported in this pcre2grep." STDOUT_NL);
1215#endif
1216
1217printf("\"-\" can be used as a file name to mean STDIN." STDOUT_NL);
1218
1219#ifdef SUPPORT_LIBZ
1220printf("Files whose names end in .gz are read using zlib." STDOUT_NL);
1221#endif
1222
1223#ifdef SUPPORT_LIBBZ2
1224printf("Files whose names end in .bz2 are read using bzlib2." STDOUT_NL);
1225#endif
1226
1227#if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
1228printf("Other files and the standard input are read as plain files." STDOUT_NL STDOUT_NL);
1229#else
1230printf("All files are read as plain files, without any interpretation." STDOUT_NL STDOUT_NL);
1231#endif
1232
1233printf("Example: pcre2grep -i " QUOT "hello.*world" QUOT " menu.h main.c" STDOUT_NL STDOUT_NL);
1234printf("Options:" STDOUT_NL);
1235
1236for (op = optionlist; op->one_char != 0; op++)
1237 {
1238 int n;
1239 char s[4];
1240
1241 if (op->one_char > 0 && (op->long_name)[0] == 0)
1242 n = 31 - printf(" -%c", op->one_char);
1243 else
1244 {
1245 if (op->one_char > 0) sprintf(s, "-%c,", op->one_char);
1246 else strcpy(s, " ");
1247 n = 31 - printf(" %s --%s", s, op->long_name);
1248 }
1249
1250 if (n < 1) n = 1;
1251 printf("%.*s%s" STDOUT_NL, n, " ", op->help_text);
1252 }
1253
1254printf(STDOUT_NL "Numbers may be followed by K or M, e.g. --max-buffer-size=100K." STDOUT_NL);
1255printf("The default value for --buffer-size is %d." STDOUT_NL, PCRE2GREP_BUFSIZE);
1256printf("The default value for --max-buffer-size is %d." STDOUT_NL, PCRE2GREP_MAX_BUFSIZE);
1257printf("When reading patterns or file names from a file, trailing white" STDOUT_NL);
1258printf("space is removed and blank lines are ignored." STDOUT_NL);
1259printf("The maximum size of any pattern is %d bytes." STDOUT_NL, MAXPATLEN);
1260
1261printf(STDOUT_NL "With no FILEs, read standard input. If fewer than two FILEs given, assume -h." STDOUT_NL);
1262printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble." STDOUT_NL);
1263}
1264
1265
1266
1267/*************************************************
1268* Test exclude/includes *
1269*************************************************/
1270
1271/* If any exclude pattern matches, the path is excluded. Otherwise, unless
1272there are no includes, the path must match an include pattern.
1273
1274Arguments:
1275 path the path to be matched
1276 ip the chain of include patterns
1277 ep the chain of exclude patterns
1278
1279Returns: TRUE if the path is not excluded
1280*/
1281
1282static BOOL
1283test_incexc(char *path, patstr *ip, patstr *ep)
1284{
1285int plen = strlen((const char *)path);
1286
1287for (; ep != NULL; ep = ep->next)
1288 {
1289 if (pcre2_match(ep->compiled, (PCRE2_SPTR)path, plen, 0, 0, match_data, NULL) >= 0)
1290 return FALSE;
1291 }
1292
1293if (ip == NULL) return TRUE;
1294
1295for (; ip != NULL; ip = ip->next)
1296 {
1297 if (pcre2_match(ip->compiled, (PCRE2_SPTR)path, plen, 0, 0, match_data, NULL) >= 0)
1298 return TRUE;
1299 }
1300
1301return FALSE;
1302}
1303
1304
1305
1306/*************************************************
1307* Decode integer argument value *
1308*************************************************/
1309
1310/* Integer arguments can be followed by K or M. Avoid the use of strtoul()
1311because SunOS4 doesn't have it. This is used only for unpicking arguments, so
1312just keep it simple.
1313
1314Arguments:
1315 option_data the option data string
1316 op the option item (for error messages)
1317 longop TRUE if option given in long form
1318
1319Returns: a long integer
1320*/
1321
1322static long int
1323decode_number(char *option_data, option_item *op, BOOL longop)
1324{
1325unsigned long int n = 0;
1326char *endptr = option_data;
1327while (*endptr != 0 && isspace((unsigned char)(*endptr))) endptr++;
1328while (isdigit((unsigned char)(*endptr)))
1329 n = n * 10 + (int)(*endptr++ - '0');
1330if (toupper(*endptr) == 'K')
1331 {
1332 n *= 1024;
1333 endptr++;
1334 }
1335else if (toupper(*endptr) == 'M')
1336 {
1337 n *= 1024*1024;
1338 endptr++;
1339 }
1340
1341if (*endptr != 0) /* Error */
1342 {
1343 if (longop)
1344 {
1345 char *equals = strchr(op->long_name, '=');
1346 int nlen = (equals == NULL)? (int)strlen(op->long_name) :
1347 (int)(equals - op->long_name);
1348 fprintf(stderr, "pcre2grep: Malformed number \"%s\" after --%.*s\n",
1349 option_data, nlen, op->long_name);
1350 }
1351 else
1352 fprintf(stderr, "pcre2grep: Malformed number \"%s\" after -%c\n",
1353 option_data, op->one_char);
1354 pcre2grep_exit(usage(2));
1355 }
1356
1357return n;
1358}
1359
1360
1361
1362/*************************************************
1363* Add item to a chain of numbers *
1364*************************************************/
1365
1366/* Used to add an item onto a chain, or just return an unconnected item if the
1367"after" argument is NULL.
1368
1369Arguments:
1370 n the number to add
1371 after if not NULL points to item to insert after
1372
1373Returns: new number block
1374*/
1375
1376static omstr *
1377add_number(int n, omstr *after)
1378{
1379omstr *om = (omstr *)malloc(sizeof(omstr));
1380
1381if (om == NULL)
1382 {
1383 fprintf(stderr, "pcre2grep: malloc failed\n");
1384 pcre2grep_exit(2);
1385 }
1386om->next = NULL;
1387om->groupnum = n;
1388
1389if (after != NULL)
1390 {
1391 om->next = after->next;
1392 after->next = om;
1393 }
1394return om;
1395}
1396
1397
1398
1399/*************************************************
1400* Read one line of input *
1401*************************************************/
1402
1403/* Normally, input that is to be scanned is read using fread() (or gzread, or
1404BZ2_read) into a large buffer, so many lines may be read at once. However,
1405doing this for tty input means that no output appears until a lot of input has
1406been typed. Instead, tty input is handled line by line. We cannot use fgets()
1407for this, because it does not stop at a binary zero, and therefore there is no
1408way of telling how many characters it has read, because there may be binary
1409zeros embedded in the data. This function is also used for reading patterns
1410from files (the -f option).
1411
1412Arguments:
1413 buffer the buffer to read into
1414 length the maximum number of characters to read
1415 f the file
1416
1417Returns: the number of characters read, zero at end of file
1418*/
1419
1420static PCRE2_SIZE
1421read_one_line(char *buffer, int length, FILE *f)
1422{
1423int c;
1424int yield = 0;
1425while ((c = fgetc(f)) != EOF)
1426 {
1427 buffer[yield++] = c;
1428 if (c == '\n' || yield >= length) break;
1429 }
1430return yield;
1431}
1432
1433
1434
1435/*************************************************
1436* Find end of line *
1437*************************************************/
1438
1439/* The length of the endline sequence that is found is set via lenptr. This may
1440be zero at the very end of the file if there is no line-ending sequence there.
1441
1442Arguments:
1443 p current position in line
1444 endptr end of available data
1445 lenptr where to put the length of the eol sequence
1446
1447Returns: pointer after the last byte of the line,
1448 including the newline byte(s)
1449*/
1450
1451static char *
1452end_of_line(char *p, char *endptr, int *lenptr)
1453{
1454switch(endlinetype)
1455 {
1456 default: /* Just in case */
1457 case PCRE2_NEWLINE_LF:
1458 while (p < endptr && *p != '\n') p++;
1459 if (p < endptr)
1460 {
1461 *lenptr = 1;
1462 return p + 1;
1463 }
1464 *lenptr = 0;
1465 return endptr;
1466
1467 case PCRE2_NEWLINE_CR:
1468 while (p < endptr && *p != '\r') p++;
1469 if (p < endptr)
1470 {
1471 *lenptr = 1;
1472 return p + 1;
1473 }
1474 *lenptr = 0;
1475 return endptr;
1476
1477 case PCRE2_NEWLINE_NUL:
1478 while (p < endptr && *p != '\0') p++;
1479 if (p < endptr)
1480 {
1481 *lenptr = 1;
1482 return p + 1;
1483 }
1484 *lenptr = 0;
1485 return endptr;
1486
1487 case PCRE2_NEWLINE_CRLF:
1488 for (;;)
1489 {
1490 while (p < endptr && *p != '\r') p++;
1491 if (++p >= endptr)
1492 {
1493 *lenptr = 0;
1494 return endptr;
1495 }
1496 if (*p == '\n')
1497 {
1498 *lenptr = 2;
1499 return p + 1;
1500 }
1501 }
1502 break;
1503
1504 case PCRE2_NEWLINE_ANYCRLF:
1505 while (p < endptr)
1506 {
1507 int extra = 0;
1508 int c = *((unsigned char *)p);
1509
1510 if (utf && c >= 0xc0)
1511 {
1512 int gcii, gcss;
1513 extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
1514 gcss = 6*extra;
1515 c = (c & utf8_table3[extra]) << gcss;
1516 for (gcii = 1; gcii <= extra; gcii++)
1517 {
1518 gcss -= 6;
1519 c |= (p[gcii] & 0x3f) << gcss;
1520 }
1521 }
1522
1523 p += 1 + extra;
1524
1525 switch (c)
1526 {
1527 case '\n':
1528 *lenptr = 1;
1529 return p;
1530
1531 case '\r':
1532 if (p < endptr && *p == '\n')
1533 {
1534 *lenptr = 2;
1535 p++;
1536 }
1537 else *lenptr = 1;
1538 return p;
1539
1540 default:
1541 break;
1542 }
1543 } /* End of loop for ANYCRLF case */
1544
1545 *lenptr = 0; /* Must have hit the end */
1546 return endptr;
1547
1548 case PCRE2_NEWLINE_ANY:
1549 while (p < endptr)
1550 {
1551 int extra = 0;
1552 int c = *((unsigned char *)p);
1553
1554 if (utf && c >= 0xc0)
1555 {
1556 int gcii, gcss;
1557 extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
1558 gcss = 6*extra;
1559 c = (c & utf8_table3[extra]) << gcss;
1560 for (gcii = 1; gcii <= extra; gcii++)
1561 {
1562 gcss -= 6;
1563 c |= (p[gcii] & 0x3f) << gcss;
1564 }
1565 }
1566
1567 p += 1 + extra;
1568
1569 switch (c)
1570 {
1571 case '\n': /* LF */
1572 case '\v': /* VT */
1573 case '\f': /* FF */
1574 *lenptr = 1;
1575 return p;
1576
1577 case '\r': /* CR */
1578 if (p < endptr && *p == '\n')
1579 {
1580 *lenptr = 2;
1581 p++;
1582 }
1583 else *lenptr = 1;
1584 return p;
1585
1586#ifndef EBCDIC
1587 case 0x85: /* Unicode NEL */
1588 *lenptr = utf? 2 : 1;
1589 return p;
1590
1591 case 0x2028: /* Unicode LS */
1592 case 0x2029: /* Unicode PS */
1593 *lenptr = 3;
1594 return p;
1595#endif /* Not EBCDIC */
1596
1597 default:
1598 break;
1599 }
1600 } /* End of loop for ANY case */
1601
1602 *lenptr = 0; /* Must have hit the end */
1603 return endptr;
1604 } /* End of overall switch */
1605}
1606
1607
1608
1609/*************************************************
1610* Find start of previous line *
1611*************************************************/
1612
1613/* This is called when looking back for before lines to print.
1614
1615Arguments:
1616 p start of the subsequent line
1617 startptr start of available data
1618
1619Returns: pointer to the start of the previous line
1620*/
1621
1622static char *
1623previous_line(char *p, char *startptr)
1624{
1625switch(endlinetype)
1626 {
1627 default: /* Just in case */
1628 case PCRE2_NEWLINE_LF:
1629 p--;
1630 while (p > startptr && p[-1] != '\n') p--;
1631 return p;
1632
1633 case PCRE2_NEWLINE_CR:
1634 p--;
1635 while (p > startptr && p[-1] != '\n') p--;
1636 return p;
1637
1638 case PCRE2_NEWLINE_NUL:
1639 p--;
1640 while (p > startptr && p[-1] != '\0') p--;
1641 return p;
1642
1643 case PCRE2_NEWLINE_CRLF:
1644 for (;;)
1645 {
1646 p -= 2;
1647 while (p > startptr && p[-1] != '\n') p--;
1648 if (p <= startptr + 1 || p[-2] == '\r') return p;
1649 }
1650 /* Control can never get here */
1651
1652 case PCRE2_NEWLINE_ANY:
1653 case PCRE2_NEWLINE_ANYCRLF:
1654 if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;
1655 if (utf) while ((*p & 0xc0) == 0x80) p--;
1656
1657 while (p > startptr)
1658 {
1659 unsigned int c;
1660 char *pp = p - 1;
1661
1662 if (utf)
1663 {
1664 int extra = 0;
1665 while ((*pp & 0xc0) == 0x80) pp--;
1666 c = *((unsigned char *)pp);
1667 if (c >= 0xc0)
1668 {
1669 int gcii, gcss;
1670 extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
1671 gcss = 6*extra;
1672 c = (c & utf8_table3[extra]) << gcss;
1673 for (gcii = 1; gcii <= extra; gcii++)
1674 {
1675 gcss -= 6;
1676 c |= (pp[gcii] & 0x3f) << gcss;
1677 }
1678 }
1679 }
1680 else c = *((unsigned char *)pp);
1681
1682 if (endlinetype == PCRE2_NEWLINE_ANYCRLF) switch (c)
1683 {
1684 case '\n': /* LF */
1685 case '\r': /* CR */
1686 return p;
1687
1688 default:
1689 break;
1690 }
1691
1692 else switch (c)
1693 {
1694 case '\n': /* LF */
1695 case '\v': /* VT */
1696 case '\f': /* FF */
1697 case '\r': /* CR */
1698#ifndef EBCDIC
1699 case 0x85: /* Unicode NEL */
1700 case 0x2028: /* Unicode LS */
1701 case 0x2029: /* Unicode PS */
1702#endif /* Not EBCDIC */
1703 return p;
1704
1705 default:
1706 break;
1707 }
1708
1709 p = pp; /* Back one character */
1710 } /* End of loop for ANY case */
1711
1712 return startptr; /* Hit start of data */
1713 } /* End of overall switch */
1714}
1715
1716
1717
1718/*************************************************
1719* Output newline at end *
1720*************************************************/
1721
1722/* This function is called if the final line of a file has been written to
1723stdout, but it does not have a terminating newline.
1724
1725Arguments: none
1726Returns: nothing
1727*/
1728
1729static void
1730write_final_newline(void)
1731{
1732switch(endlinetype)
1733 {
1734 default: /* Just in case */
1735 case PCRE2_NEWLINE_LF:
1736 case PCRE2_NEWLINE_ANY:
1737 case PCRE2_NEWLINE_ANYCRLF:
1738 fprintf(stdout, "\n");
1739 break;
1740
1741 case PCRE2_NEWLINE_CR:
1742 fprintf(stdout, "\r");
1743 break;
1744
1745 case PCRE2_NEWLINE_CRLF:
1746 fprintf(stdout, "\r\n");
1747 break;
1748
1749 case PCRE2_NEWLINE_NUL:
1750 fprintf(stdout, "%c", 0);
1751 break;
1752 }
1753}
1754
1755
1756/*************************************************
1757* Print the previous "after" lines *
1758*************************************************/
1759
1760/* This is called if we are about to lose said lines because of buffer filling,
1761and at the end of the file. The data in the line is written using fwrite() so
1762that a binary zero does not terminate it.
1763
1764Arguments:
1765 lastmatchnumber the number of the last matching line, plus one
1766 lastmatchrestart where we restarted after the last match
1767 endptr end of available data
1768 printname filename for printing
1769
1770Returns: nothing
1771*/
1772
1773static void
1774do_after_lines(unsigned long int lastmatchnumber, char *lastmatchrestart,
1775 char *endptr, const char *printname)
1776{
1777if (after_context > 0 && lastmatchnumber > 0)
1778 {
1779 int count = 0;
1780 int ellength = 0;
1781 while (lastmatchrestart < endptr && count < after_context)
1782 {
1783 char *pp = end_of_line(lastmatchrestart, endptr, &ellength);
1784 if (ellength == 0 && pp == main_buffer + bufsize) break;
1785 if (printname != NULL) fprintf(stdout, "%s-", printname);
1786 if (number) fprintf(stdout, "%lu-", lastmatchnumber++);
1787 FWRITE_IGNORE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
1788 lastmatchrestart = pp;
1789 count++;
1790 }
1791
1792 /* If we have printed any lines, arrange for a hyphen separator if anything
1793 else follows. Also, if the last line is the final line in the file and it had
1794 no newline, add one. */
1795
1796 if (count > 0)
1797 {
1798 hyphenpending = TRUE;
1799 if (ellength == 0 && lastmatchrestart >= endptr)
1800 write_final_newline();
1801 }
1802 }
1803}
1804
1805
1806
1807/*************************************************
1808* Apply patterns to subject till one matches *
1809*************************************************/
1810
1811/* This function is called to run through all patterns, looking for a match. It
1812is used multiple times for the same subject when colouring is enabled, in order
1813to find all possible matches.
1814
1815Arguments:
1816 matchptr the start of the subject
1817 length the length of the subject to match
1818 options options for pcre_exec
1819 startoffset where to start matching
1820 mrc address of where to put the result of pcre2_match()
1821
1822Returns: TRUE if there was a match
1823 FALSE if there was no match
1824 invert if there was a non-fatal error
1825*/
1826
1827static BOOL
1828match_patterns(char *matchptr, PCRE2_SIZE length, unsigned int options,
1829 PCRE2_SIZE startoffset, int *mrc)
1830{
1831int i;
1832PCRE2_SIZE slen = length;
1833patstr *p = patterns;
1834const char *msg = "this text:\n\n";
1835
1836if (slen > 200)
1837 {
1838 slen = 200;
1839 msg = "text that starts:\n\n";
1840 }
1841
1842for (i = 1; p != NULL; p = p->next, i++)
1843 {
1844 *mrc = pcre2_match(p->compiled, (PCRE2_SPTR)matchptr, (int)length,
1845 startoffset, options, match_data, match_context);
1846 if (*mrc >= 0) return TRUE;
1847 if (*mrc == PCRE2_ERROR_NOMATCH) continue;
1848 fprintf(stderr, "pcre2grep: pcre2_match() gave error %d while matching ", *mrc);
1849 if (patterns->next != NULL) fprintf(stderr, "pattern number %d to ", i);
1850 fprintf(stderr, "%s", msg);
1851 FWRITE_IGNORE(matchptr, 1, slen, stderr); /* In case binary zero included */
1852 fprintf(stderr, "\n\n");
1853 if (*mrc <= PCRE2_ERROR_UTF8_ERR1 &&
1854 *mrc >= PCRE2_ERROR_UTF8_ERR21)
1855 {
1856 unsigned char mbuffer[256];
1857 PCRE2_SIZE startchar = pcre2_get_startchar(match_data);
1858 (void)pcre2_get_error_message(*mrc, mbuffer, sizeof(mbuffer));
1859 fprintf(stderr, "%s at offset %" SIZ_FORM "\n\n", mbuffer,
1860 SIZ_CAST startchar);
1861 }
1862 if (*mrc == PCRE2_ERROR_MATCHLIMIT || *mrc == PCRE2_ERROR_DEPTHLIMIT ||
1863 *mrc == PCRE2_ERROR_HEAPLIMIT || *mrc == PCRE2_ERROR_JIT_STACKLIMIT)
1864 resource_error = TRUE;
1865 if (error_count++ > 20)
1866 {
1867 fprintf(stderr, "pcre2grep: Too many errors - abandoned.\n");
1868 pcre2grep_exit(2);
1869 }
1870 return invert; /* No more matching; don't show the line again */
1871 }
1872
1873return FALSE; /* No match, no errors */
1874}
1875
1876
1877
1878/*************************************************
1879* Decode dollar escape sequence *
1880*************************************************/
1881
1882/* Called from various places to decode $ escapes in output strings. The escape
1883sequences are as follows:
1884
1885$<digits> or ${<digits>} returns a capture number. However, if callout is TRUE,
1886zero is never returned; '0' is substituted.
1887
1888$a returns bell.
1889$b returns backspace.
1890$e returns escape.
1891$f returns form feed.
1892$n returns newline.
1893$r returns carriage return.
1894$t returns tab.
1895$v returns vertical tab.
1896$o<digits> returns the character represented by the given octal
1897 number; up to three digits are processed.
1898$o{<digits>} does the same, up to 7 digits, but gives an error for mode-invalid
1899 code points.
1900$x<digits> returns the character represented by the given hexadecimal
1901 number; up to two digits are processed.
1902$x{<digits} does the same, up to 6 digits, but gives an error for mode-invalid
1903 code points.
1904Any other character is substituted by itself. E.g: $$ is replaced by a single
1905dollar.
1906
1907Arguments:
1908 begin the start of the whole string
1909 string points to the $
1910 callout TRUE if in a callout (inhibits error messages)
1911 value where to return a value
1912 last where to return pointer to the last used character
1913
1914Returns: DDE_ERROR after a syntax error
1915 DDE_CAPTURE if *value is a capture number
1916 DDE_CHAR if *value is a character code
1917*/
1918
1919static int
1920decode_dollar_escape(PCRE2_SPTR begin, PCRE2_SPTR string, BOOL callout,
1921 uint32_t *value, PCRE2_SPTR *last)
1922{
1923uint32_t c = 0;
1924int base = 10;
1925int dcount;
1926int rc = DDE_CHAR;
1927BOOL brace = FALSE;
1928
1929switch (*(++string))
1930 {
1931 case 0: /* Syntax error: a character must be present after $. */
1932 if (!callout)
1933 fprintf(stderr, "pcre2grep: Error in output text at offset %d: %s\n",
1934 (int)(string - begin), "no character after $");
1935 *last = string;
1936 return DDE_ERROR;
1937
1938 case '{':
1939 brace = TRUE;
1940 string++;
1941 if (!isdigit(*string)) /* Syntax error: a decimal number required. */
1942 {
1943 if (!callout)
1944 fprintf(stderr, "pcre2grep: Error in output text at offset %d: %s\n",
1945 (int)(string - begin), "decimal number expected");
1946 rc = DDE_ERROR;
1947 break;
1948 }
1949
1950 /* Fall through */
1951
1952 /* The maximum capture number is 65535, so any number greater than that will
1953 always be an unknown capture number. We just stop incrementing, in order to
1954 avoid overflow. */
1955
1956 case '0': case '1': case '2': case '3': case '4':
1957 case '5': case '6': case '7': case '8': case '9':
1958 do
1959 {
1960 if (c <= 65535) c = c * 10 + (*string - '0');
1961 string++;
1962 }
1963 while (*string >= '0' && *string <= '9');
1964 string--; /* Point to last digit */
1965
1966 /* In a callout, capture number 0 is not available. No error can be given,
1967 so just return the character '0'. */
1968
1969 if (callout && c == 0)
1970 {
1971 *value = '0';
1972 }
1973 else
1974 {
1975 *value = c;
1976 rc = DDE_CAPTURE;
1977 }
1978 break;
1979
1980 /* Limit octal numbers to 3 digits without braces, or up to 7 with braces,
1981 for valid Unicode code points. */
1982
1983 case 'o':
1984 base = 8;
1985 string++;
1986 if (*string == '{')
1987 {
1988 brace = TRUE;
1989 string++;
1990 dcount = 7;
1991 }
1992 else dcount = 3;
1993 for (; dcount > 0; dcount--)
1994 {
1995 if (*string < '0' || *string > '7') break;
1996 c = c * 8 + (*string++ - '0');
1997 }
1998 *value = c;
1999 string--; /* Point to last digit */
2000 break;
2001
2002 /* Limit hex numbers to 2 digits without braces, or up to 6 with braces,
2003 for valid Unicode code points. */
2004
2005 case 'x':
2006 base = 16;
2007 string++;
2008 if (*string == '{')
2009 {
2010 brace = TRUE;
2011 string++;
2012 dcount = 6;
2013 }
2014 else dcount = 2;
2015 for (; dcount > 0; dcount--)
2016 {
2017 if (!isxdigit(*string)) break;
2018 if (*string >= '0' && *string <= '9')
2019 c = c *16 + *string++ - '0';
2020 else
2021 c = c * 16 + (*string++ | 0x20) - 'a' + 10;
2022 }
2023 *value = c;
2024 string--; /* Point to last digit */
2025 break;
2026
2027 case 'a': *value = '\a'; break;
2028 case 'b': *value = '\b'; break;
2029#ifndef EBCDIC
2030 case 'e': *value = '\033'; break;
2031#else
2032 case 'e': *value = '\047'; break;
2033#endif
2034 case 'f': *value = '\f'; break;
2035 case 'n': *value = STDOUT_NL_CODE; break;
2036 case 'r': *value = '\r'; break;
2037 case 't': *value = '\t'; break;
2038 case 'v': *value = '\v'; break;
2039
2040 default: *value = *string; break;
2041 }
2042
2043if (brace)
2044 {
2045 c = string[1];
2046 if (c != '}')
2047 {
2048 rc = DDE_ERROR;
2049 if (!callout)
2050 {
2051 if ((base == 8 && c >= '0' && c <= '7') ||
2052 (base == 16 && isxdigit(c)))
2053 {
2054 fprintf(stderr, "pcre2grep: Error in output text at offset %d: "
2055 "too many %s digits\n", (int)(string - begin),
2056 (base == 8)? "octal" : "hex");
2057 }
2058 else
2059 {
2060 fprintf(stderr, "pcre2grep: Error in output text at offset %d: %s\n",
2061 (int)(string - begin), "missing closing brace");
2062 }
2063 }
2064 }
2065 else string++;
2066 }
2067
2068/* Check maximum code point values, but take note of STDOUT_NL_CODE. */
2069
2070if (rc == DDE_CHAR && *value != STDOUT_NL_CODE)
2071 {
2072 uint32_t max = utf? 0x0010ffffu : 0xffu;
2073 if (*value > max)
2074 {
2075 if (!callout)
2076 fprintf(stderr, "pcre2grep: Error in output text at offset %d: "
2077 "code point greater than 0x%x is invalid\n", (int)(string - begin), max);
2078 rc = DDE_ERROR;
2079 }
2080 }
2081
2082*last = string;
2083return rc;
2084}
2085
2086
2087
2088/*************************************************
2089* Check output text for errors *
2090*************************************************/
2091
2092/* Called early, to get errors before doing anything for -O text; also called
2093from callouts to check before outputting.
2094
2095Arguments:
2096 string an --output text string
2097 callout TRUE if in a callout (stops printing errors)
2098
2099Returns: TRUE if OK, FALSE on error
2100*/
2101
2102static BOOL
2103syntax_check_output_text(PCRE2_SPTR string, BOOL callout)
2104{
2105uint32_t value;
2106PCRE2_SPTR begin = string;
2107
2108for (; *string != 0; string++)
2109 {
2110 if (*string == '$' &&
2111 decode_dollar_escape(begin, string, callout, &value, &string) == DDE_ERROR)
2112 return FALSE;
2113 }
2114
2115return TRUE;
2116}
2117
2118
2119/*************************************************
2120* Display output text *
2121*************************************************/
2122
2123/* Display the output text, which is assumed to have already been syntax
2124checked. Output may contain escape sequences started by the dollar sign.
2125
2126Arguments:
2127 string: the output text
2128 callout: TRUE for the builtin callout, FALSE for --output
2129 subject the start of the subject
2130 ovector: capture offsets
2131 capture_top: number of captures
2132
2133Returns: TRUE if something was output, other than newline
2134 FALSE if nothing was output, or newline was last output
2135*/
2136
2137static BOOL
2138display_output_text(PCRE2_SPTR string, BOOL callout, PCRE2_SPTR subject,
2139 PCRE2_SIZE *ovector, PCRE2_SIZE capture_top)
2140{
2141uint32_t value;
2142BOOL printed = FALSE;
2143PCRE2_SPTR begin = string;
2144
2145for (; *string != 0; string++)
2146 {
2147 if (*string == '$')
2148 {
2149 switch(decode_dollar_escape(begin, string, callout, &value, &string))
2150 {
2151 case DDE_CHAR:
2152 if (value == STDOUT_NL_CODE)
2153 {
2154 fprintf(stdout, STDOUT_NL);
2155 printed = FALSE;
2156 continue;
2157 }
2158 break; /* Will print value */
2159
2160 case DDE_CAPTURE:
2161 if (value < capture_top)
2162 {
2163 PCRE2_SIZE capturesize;
2164 value *= 2;
2165 capturesize = ovector[value + 1] - ovector[value];
2166 if (capturesize > 0)
2167 {
2168 print_match(subject + ovector[value], capturesize);
2169 printed = TRUE;
2170 }
2171 }
2172 continue;
2173
2174 default: /* Should not occur */
2175 break;
2176 }
2177 }
2178
2179 else value = *string; /* Not a $ escape */
2180
2181 if (utf && value <= 127) fprintf(stdout, "%c", *string); else
2182 {
2183 int i;
2184 int n = ord2utf8(value);
2185 for (i = 0; i < n; i++) fputc(utf8_buffer[i], stdout);
2186 }
2187
2188 printed = TRUE;
2189 }
2190
2191return printed;
2192}
2193
2194
2195#ifdef SUPPORT_PCRE2GREP_CALLOUT
2196
2197/*************************************************
2198* Parse and execute callout scripts *
2199*************************************************/
2200
2201/* If SUPPORT_PCRE2GREP_CALLOUT_FORK is defined, this function parses a callout
2202string block and executes the program specified by the string. The string is a
2203list of substrings separated by pipe characters. The first substring represents
2204the executable name, and the following substrings specify the arguments:
2205
2206 program_name|param1|param2|...
2207
2208Any substring (including the program name) can contain escape sequences
2209started by the dollar character. The escape sequences are substituted as
2210follows:
2211
2212 $<digits> or ${<digits>} is replaced by the captured substring of the given
2213 decimal number, which must be greater than zero. If the number is greater
2214 than the number of capturing substrings, or if the capture is unset, the
2215 replacement is empty.
2216
2217 Any other character is substituted by itself. E.g: $$ is replaced by a single
2218 dollar or $| replaced by a pipe character.
2219
2220Alternatively, if string starts with pipe, the remainder is taken as an output
2221string, same as --output. This is the only form that is supported if
2222SUPPORT_PCRE2GREP_FORK is not defined. In this case, --om-separator is used to
2223separate each callout, defaulting to newline.
2224
2225Example:
2226
2227 echo -e "abcde\n12345" | pcre2grep \
2228 '(.)(..(.))(?C"/bin/echo|Arg1: [$1] [$2] [$3]|Arg2: $|${1}$| ($4)")()' -
2229
2230 Output:
2231
2232 Arg1: [a] [bcd] [d] Arg2: |a| ()
2233 abcde
2234 Arg1: [1] [234] [4] Arg2: |1| ()
2235 12345
2236
2237Arguments:
2238 blockptr the callout block
2239
2240Returns: currently it always returns with 0
2241*/
2242
2243static int
2244pcre2grep_callout(pcre2_callout_block *calloutptr, void *unused)
2245{
2246PCRE2_SIZE length = calloutptr->callout_string_length;
2247PCRE2_SPTR string = calloutptr->callout_string;
2248PCRE2_SPTR subject = calloutptr->subject;
2249PCRE2_SIZE *ovector = calloutptr->offset_vector;
2250PCRE2_SIZE capture_top = calloutptr->capture_top;
2251
2252#ifdef SUPPORT_PCRE2GREP_CALLOUT_FORK
2253PCRE2_SIZE argsvectorlen = 2;
2254PCRE2_SIZE argslen = 1;
2255char *args;
2256char *argsptr;
2257char **argsvector;
2258char **argsvectorptr;
2259#ifndef WIN32
2260pid_t pid;
2261#endif
2262int result = 0;
2263#endif /* SUPPORT_PCRE2GREP_CALLOUT_FORK */
2264
2265(void)unused; /* Avoid compiler warning */
2266
2267/* Only callouts with strings are supported. */
2268
2269if (string == NULL || length == 0) return 0;
2270
2271/* If there's no command, output the remainder directly. */
2272
2273if (*string == '|')
2274 {
2275 string++;
2276 if (!syntax_check_output_text(string, TRUE)) return 0;
2277 (void)display_output_text(string, TRUE, subject, ovector, capture_top);
2278 return 0;
2279 }
2280
2281#ifndef SUPPORT_PCRE2GREP_CALLOUT_FORK
2282return 0;
2283#else
2284
2285/* Checking syntax and compute the number of string fragments. Callout strings
2286are silently ignored in the event of a syntax error. */
2287
2288while (length > 0)
2289 {
2290 if (*string == '|')
2291 {
2292 argsvectorlen++;
2293 if (argsvectorlen > 10000) return 0; /* Too many args */
2294 }
2295
2296 else if (*string == '$')
2297 {
2298 uint32_t value;
2299 PCRE2_SPTR begin = string;
2300
2301 switch (decode_dollar_escape(begin, string, TRUE, &value, &string))
2302 {
2303 case DDE_CAPTURE:
2304 if (value < capture_top)
2305 {
2306 value *= 2;
2307 argslen += ovector[value + 1] - ovector[value];
2308 }
2309 argslen--; /* Negate the effect of argslen++ below. */
2310 break;
2311
2312 case DDE_CHAR:
2313 if (value == STDOUT_NL_CODE) argslen += STDOUT_NL_LEN - 1;
2314 else if (utf && value > 127) argslen += ord2utf8(value) - 1;
2315 break;
2316
2317 default: /* Should not occur */
2318 case DDE_ERROR:
2319 return 0;
2320 }
2321
2322 length -= (string - begin);
2323 }
2324
2325 string++;
2326 length--;
2327 argslen++;
2328 }
2329
2330/* Get memory for the argument vector and its strings. */
2331
2332args = (char*)malloc(argslen);
2333if (args == NULL) return 0;
2334
2335argsvector = (char**)malloc(argsvectorlen * sizeof(char*));
2336if (argsvector == NULL)
2337 {
2338 free(args);
2339 return 0;
2340 }
2341
2342/* Now reprocess the string and set up the arguments. */
2343
2344argsptr = args;
2345argsvectorptr = argsvector;
2346*argsvectorptr++ = argsptr;
2347
2348length = calloutptr->callout_string_length;
2349string = calloutptr->callout_string;
2350
2351while (length > 0)
2352 {
2353 if (*string == '|')
2354 {
2355 *argsptr++ = '\0';
2356 *argsvectorptr++ = argsptr;
2357 }
2358
2359 else if (*string == '$')
2360 {
2361 uint32_t value;
2362 PCRE2_SPTR begin = string;
2363
2364 switch (decode_dollar_escape(begin, string, TRUE, &value, &string))
2365 {
2366 case DDE_CAPTURE:
2367 if (value < capture_top)
2368 {
2369 PCRE2_SIZE capturesize;
2370 value *= 2;
2371 capturesize = ovector[value + 1] - ovector[value];
2372 memcpy(argsptr, subject + ovector[value], capturesize);
2373 argsptr += capturesize;
2374 }
2375 break;
2376
2377 case DDE_CHAR:
2378 if (value == STDOUT_NL_CODE)
2379 {
2380 memcpy(argsptr, STDOUT_NL, STDOUT_NL_LEN);
2381 argsptr += STDOUT_NL_LEN;
2382 }
2383 else if (utf && value > 127)
2384 {
2385 int n = ord2utf8(value);
2386 memcpy(argsptr, utf8_buffer, n);
2387 argsptr += n;
2388 }
2389 else
2390 {
2391 *argsptr++ = value;
2392 }
2393 break;
2394
2395 default: /* Even though this should not occur, the string having */
2396 case DDE_ERROR: /* been checked above, we need to include the free() */
2397 free(args); /* calls so that source checkers do not complain. */
2398 free(argsvector);
2399 return 0;
2400 }
2401
2402 length -= (string - begin);
2403 }
2404
2405 else *argsptr++ = *string;
2406
2407 /* Advance along the string */
2408
2409 string++;
2410 length--;
2411 }
2412
2413*argsptr++ = '\0';
2414*argsvectorptr = NULL;
2415
2416/* Running an external command is system-dependent. Handle Windows and VMS as
2417necessary, otherwise assume fork(). */
2418
2419#ifdef WIN32
2420result = _spawnvp(_P_WAIT, argsvector[0], (const char * const *)argsvector);
2421
2422#elif defined __VMS
2423 {
2424 char cmdbuf[500];
2425 short i = 0;
2426 int flags = CLI$M_NOCLISYM|CLI$M_NOLOGNAM|CLI$M_NOKEYPAD, status, retstat;
2427 $DESCRIPTOR(cmd, cmdbuf);
2428
2429 cmdbuf[0] = 0;
2430 while (argsvector[i])
2431 {
2432 strcat(cmdbuf, argsvector[i]);
2433 strcat(cmdbuf, " ");
2434 i++;
2435 }
2436 cmd.dsc$w_length = strlen(cmdbuf) - 1;
2437 status = lib$spawn(&cmd, 0,0, &flags, 0,0, &retstat);
2438 if (!(status & 1)) result = 0;
2439 else result = retstat & 1 ? 0 : 1;
2440 }
2441
2442#else /* Neither Windows nor VMS */
2443pid = fork();
2444if (pid == 0)
2445 {
2446 (void)execv(argsvector[0], argsvector);
2447 /* Control gets here if there is an error, e.g. a non-existent program */
2448 exit(1);
2449 }
2450else if (pid > 0)
2451 (void)waitpid(pid, &result, 0);
2452#endif /* End Windows/VMS/other handling */
2453
2454free(args);
2455free(argsvector);
2456
2457/* Currently negative return values are not supported, only zero (match
2458continues) or non-zero (match fails). */
2459
2460return result != 0;
2461#endif /* SUPPORT_PCRE2GREP_CALLOUT_FORK */
2462}
2463#endif /* SUPPORT_PCRE2GREP_CALLOUT */
2464
2465
2466
2467/*************************************************
2468* Read a portion of the file into buffer *
2469*************************************************/
2470
2471static int
2472fill_buffer(void *handle, int frtype, char *buffer, int length,
2473 BOOL input_line_buffered)
2474{
2475(void)frtype; /* Avoid warning when not used */
2476
2477#ifdef SUPPORT_LIBZ
2478if (frtype == FR_LIBZ)
2479 return gzread((gzFile)handle, buffer, length);
2480else
2481#endif
2482
2483#ifdef SUPPORT_LIBBZ2
2484if (frtype == FR_LIBBZ2)
2485 return BZ2_bzread((BZFILE *)handle, buffer, length);
2486else
2487#endif
2488
2489return (input_line_buffered ?
2490 read_one_line(buffer, length, (FILE *)handle) :
2491 fread(buffer, 1, length, (FILE *)handle));
2492}
2493
2494
2495
2496/*************************************************
2497* Grep an individual file *
2498*************************************************/
2499
2500/* This is called from grep_or_recurse() below. It uses a buffer that is three
2501times the value of bufthird. The matching point is never allowed to stray into
2502the top third of the buffer, thus keeping more of the file available for
2503context printing or for multiline scanning. For large files, the pointer will
2504be in the middle third most of the time, so the bottom third is available for
2505"before" context printing.
2506
2507Arguments:
2508 handle the fopened FILE stream for a normal file
2509 the gzFile pointer when reading is via libz
2510 the BZFILE pointer when reading is via libbz2
2511 frtype FR_PLAIN, FR_LIBZ, or FR_LIBBZ2
2512 filename the file name or NULL (for errors)
2513 printname the file name if it is to be printed for each match
2514 or NULL if the file name is not to be printed
2515 it cannot be NULL if filenames[_nomatch]_only is set
2516
2517Returns: 0 if there was at least one match
2518 1 otherwise (no matches)
2519 2 if an overlong line is encountered
2520 3 if there is a read error on a .bz2 file
2521*/
2522
2523static int
2524pcre2grep(void *handle, int frtype, const char *filename, const char *printname)
2525{
2526int rc = 1;
2527int filepos = 0;
2528unsigned long int linenumber = 1;
2529unsigned long int lastmatchnumber = 0;
2530unsigned long int count = 0;
2531long int count_matched_lines = 0;
2532char *lastmatchrestart = main_buffer;
2533char *ptr = main_buffer;
2534char *endptr;
2535PCRE2_SIZE bufflength;
2536BOOL binary = FALSE;
2537BOOL endhyphenpending = FALSE;
2538BOOL lines_printed = FALSE;
2539BOOL input_line_buffered = line_buffered;
2540FILE *in = NULL; /* Ensure initialized */
2541
2542/* Do the first read into the start of the buffer and set up the pointer to end
2543of what we have. In the case of libz, a non-zipped .gz file will be read as a
2544plain file. However, if a .bz2 file isn't actually bzipped, the first read will
2545fail. */
2546
2547if (frtype != FR_LIBZ && frtype != FR_LIBBZ2)
2548 {
2549 in = (FILE *)handle;
2550 if (is_file_tty(in)) input_line_buffered = TRUE;
2551 }
2552else input_line_buffered = FALSE;
2553
2554bufflength = fill_buffer(handle, frtype, main_buffer, bufsize,
2555 input_line_buffered);
2556
2557#ifdef SUPPORT_LIBBZ2
2558if (frtype == FR_LIBBZ2 && (int)bufflength < 0) return 2; /* Gotcha: bufflength is PCRE2_SIZE */
2559#endif
2560
2561endptr = main_buffer + bufflength;
2562
2563/* Unless binary-files=text, see if we have a binary file. This uses the same
2564rule as GNU grep, namely, a search for a binary zero byte near the start of the
2565file. However, when the newline convention is binary zero, we can't do this. */
2566
2567if (binary_files != BIN_TEXT)
2568 {
2569 if (endlinetype != PCRE2_NEWLINE_NUL)
2570 binary = memchr(main_buffer, 0, (bufflength > 1024)? 1024 : bufflength)
2571 != NULL;
2572 if (binary && binary_files == BIN_NOMATCH) return 1;
2573 }
2574
2575/* Loop while the current pointer is not at the end of the file. For large
2576files, endptr will be at the end of the buffer when we are in the middle of the
2577file, but ptr will never get there, because as soon as it gets over 2/3 of the
2578way, the buffer is shifted left and re-filled. */
2579
2580while (ptr < endptr)
2581 {
2582 int endlinelength;
2583 int mrc = 0;
2584 unsigned int options = 0;
2585 BOOL match;
2586 BOOL line_matched = FALSE;
2587 char *t = ptr;
2588 PCRE2_SIZE length, linelength;
2589 PCRE2_SIZE startoffset = 0;
2590
2591 /* If the -m option set a limit for the number of matched or non-matched
2592 lines, check it here. A limit of zero means that no matching is ever done.
2593 For stdin from a file, set the file position. */
2594
2595 if (count_limit >= 0 && count_matched_lines >= count_limit)
2596 {
2597 if (frtype == FR_PLAIN && filename == stdin_name && !is_file_tty(handle))
2598 (void)fseek(handle, (long int)filepos, SEEK_SET);
2599 rc = (count_limit == 0)? 1 : 0;
2600 break;
2601 }
2602
2603 /* At this point, ptr is at the start of a line. We need to find the length
2604 of the subject string to pass to pcre2_match(). In multiline mode, it is the
2605 length remainder of the data in the buffer. Otherwise, it is the length of
2606 the next line, excluding the terminating newline. After matching, we always
2607 advance by the length of the next line. In multiline mode the PCRE2_FIRSTLINE
2608 option is used for compiling, so that any match is constrained to be in the
2609 first line. */
2610
2611 t = end_of_line(t, endptr, &endlinelength);
2612 linelength = t - ptr - endlinelength;
2613 length = multiline? (PCRE2_SIZE)(endptr - ptr) : linelength;
2614
2615 /* Check to see if the line we are looking at extends right to the very end
2616 of the buffer without a line terminator. This means the line is too long to
2617 handle at the current buffer size. Until the buffer reaches its maximum size,
2618 try doubling it and reading more data. */
2619
2620 if (endlinelength == 0 && t == main_buffer + bufsize)
2621 {
2622 if (bufthird < max_bufthird)
2623 {
2624 char *new_buffer;
2625 int new_bufthird = 2*bufthird;
2626
2627 if (new_bufthird > max_bufthird) new_bufthird = max_bufthird;
2628 new_buffer = (char *)malloc(3*new_bufthird);
2629
2630 if (new_buffer == NULL)
2631 {
2632 fprintf(stderr,
2633 "pcre2grep: line %lu%s%s is too long for the internal buffer\n"
2634 "pcre2grep: not enough memory to increase the buffer size to %d\n",
2635 linenumber,
2636 (filename == NULL)? "" : " of file ",
2637 (filename == NULL)? "" : filename,
2638 new_bufthird);
2639 return 2;
2640 }
2641
2642 /* Copy the data and adjust pointers to the new buffer location. */
2643
2644 memcpy(new_buffer, main_buffer, bufsize);
2645 bufthird = new_bufthird;
2646 bufsize = 3*bufthird;
2647 ptr = new_buffer + (ptr - main_buffer);
2648 lastmatchrestart = new_buffer + (lastmatchrestart - main_buffer);
2649 free(main_buffer);
2650 main_buffer = new_buffer;
2651
2652 /* Read more data into the buffer and then try to find the line ending
2653 again. */
2654
2655 bufflength += fill_buffer(handle, frtype, main_buffer + bufflength,
2656 bufsize - bufflength, input_line_buffered);
2657 endptr = main_buffer + bufflength;
2658 continue;
2659 }
2660 else
2661 {
2662 fprintf(stderr,
2663 "pcre2grep: line %lu%s%s is too long for the internal buffer\n"
2664 "pcre2grep: the maximum buffer size is %d\n"
2665 "pcre2grep: use the --max-buffer-size option to change it\n",
2666 linenumber,
2667 (filename == NULL)? "" : " of file ",
2668 (filename == NULL)? "" : filename,
2669 bufthird);
2670 return 2;
2671 }
2672 }
2673
2674 /* Extra processing for Jeffrey Friedl's debugging. */
2675
2676#ifdef JFRIEDL_DEBUG
2677 if (jfriedl_XT || jfriedl_XR)
2678 {
2679# include <sys/time.h>
2680# include <time.h>
2681 struct timeval start_time, end_time;
2682 struct timezone dummy;
2683 int i;
2684
2685 if (jfriedl_XT)
2686 {
2687 unsigned long newlen = length * jfriedl_XT + strlen(jfriedl_prefix) + strlen(jfriedl_postfix);
2688 const char *orig = ptr;
2689 ptr = malloc(newlen + 1);
2690 if (!ptr) {
2691 printf("out of memory");
2692 pcre2grep_exit(2);
2693 }
2694 endptr = ptr;
2695 strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix);
2696 for (i = 0; i < jfriedl_XT; i++) {
2697 strncpy(endptr, orig, length);
2698 endptr += length;
2699 }
2700 strcpy(endptr, jfriedl_postfix); endptr += strlen(jfriedl_postfix);
2701 length = newlen;
2702 }
2703
2704 if (gettimeofday(&start_time, &dummy) != 0)
2705 perror("bad gettimeofday");
2706
2707
2708 for (i = 0; i < jfriedl_XR; i++)
2709 match = (pcre_exec(patterns->compiled, patterns->hint, ptr, length, 0,
2710 PCRE2_NOTEMPTY, offsets, offset_size) >= 0);
2711
2712 if (gettimeofday(&end_time, &dummy) != 0)
2713 perror("bad gettimeofday");
2714
2715 double delta = ((end_time.tv_sec + (end_time.tv_usec / 1000000.0))
2716 -
2717 (start_time.tv_sec + (start_time.tv_usec / 1000000.0)));
2718
2719 printf("%s TIMER[%.4f]\n", match ? "MATCH" : "FAIL", delta);
2720 return 0;
2721 }
2722#endif
2723
2724 /* We come back here after a match when only_matching_count is non-zero, in
2725 order to find any further matches in the same line. This applies to
2726 --only-matching, --file-offsets, and --line-offsets. */
2727
2728 ONLY_MATCHING_RESTART:
2729
2730 /* Run through all the patterns until one matches or there is an error other
2731 than NOMATCH. This code is in a subroutine so that it can be re-used for
2732 finding subsequent matches when colouring matched lines. After finding one
2733 match, set PCRE2_NOTEMPTY to disable any further matches of null strings in
2734 this line. */
2735
2736 match = match_patterns(ptr, length, options, startoffset, &mrc);
2737 options = PCRE2_NOTEMPTY;
2738
2739 /* If it's a match or a not-match (as required), do what's wanted. NOTE: Use
2740 only FWRITE_IGNORE() - which is just a packaged fwrite() that ignores its
2741 return code - to output data lines, so that binary zeroes are treated as just
2742 another data character. */
2743
2744 if (match != invert)
2745 {
2746 BOOL hyphenprinted = FALSE;
2747
2748 /* We've failed if we want a file that doesn't have any matches. */
2749
2750 if (filenames == FN_NOMATCH_ONLY) return 1;
2751
2752 /* Remember that this line matched (for counting matched lines) */
2753
2754 line_matched = TRUE;
2755
2756 /* If all we want is a yes/no answer, we can return immediately. */
2757
2758 if (quiet) return 0;
2759
2760 /* Just count if just counting is wanted. */
2761
2762 else if (count_only || show_total_count) count++;
2763
2764 /* When handling a binary file and binary-files==binary, the "binary"
2765 variable will be set true (it's false in all other cases). In this
2766 situation we just want to output the file name. No need to scan further. */
2767
2768 else if (binary)
2769 {
2770 fprintf(stdout, "Binary file %s matches" STDOUT_NL, filename);
2771 return 0;
2772 }
2773
2774 /* Likewise, if all we want is a file name, there is no need to scan any
2775 more lines in the file. */
2776
2777 else if (filenames == FN_MATCH_ONLY)
2778 {
2779 fprintf(stdout, "%s" STDOUT_NL, printname);
2780 return 0;
2781 }
2782
2783 /* The --only-matching option prints just the substring that matched,
2784 and/or one or more captured portions of it, as long as these strings are
2785 not empty. The --file-offsets and --line-offsets options output offsets for
2786 the matching substring (all three set only_matching_count non-zero). None
2787 of these mutually exclusive options prints any context. Afterwards, adjust
2788 the start and then jump back to look for further matches in the same line.
2789 If we are in invert mode, however, nothing is printed and we do not restart
2790 - this could still be useful because the return code is set. */
2791
2792 else if (only_matching_count != 0)
2793 {
2794 if (!invert)
2795 {
2796 PCRE2_SIZE oldstartoffset;
2797
2798 if (printname != NULL) fprintf(stdout, "%s:", printname);
2799 if (number) fprintf(stdout, "%lu:", linenumber);
2800
2801 /* Handle --line-offsets */
2802
2803 if (line_offsets)
2804 fprintf(stdout, "%d,%d" STDOUT_NL, (int)(ptr + offsets[0] - ptr),
2805 (int)(offsets[1] - offsets[0]));
2806
2807 /* Handle --file-offsets */
2808
2809 else if (file_offsets)
2810 fprintf(stdout, "%d,%d" STDOUT_NL,
2811 (int)(filepos + ptr + offsets[0] - ptr),
2812 (int)(offsets[1] - offsets[0]));
2813
2814 /* Handle --output (which has already been syntax checked) */
2815
2816 else if (output_text != NULL)
2817 {
2818 if (display_output_text((PCRE2_SPTR)output_text, FALSE,
2819 (PCRE2_SPTR)ptr, offsets, mrc) || printname != NULL ||
2820 number)
2821 fprintf(stdout, STDOUT_NL);
2822 }
2823
2824 /* Handle --only-matching, which may occur many times */
2825
2826 else
2827 {
2828 BOOL printed = FALSE;
2829 omstr *om;
2830
2831 for (om = only_matching; om != NULL; om = om->next)
2832 {
2833 int n = om->groupnum;
2834 if (n == 0 || n < mrc)
2835 {
2836 int plen = offsets[2*n + 1] - offsets[2*n];
2837 if (plen > 0)
2838 {
2839 if (printed && om_separator != NULL)
2840 fprintf(stdout, "%s", om_separator);
2841 print_match(ptr + offsets[n*2], plen);
2842 printed = TRUE;
2843 }
2844 }
2845 }
2846
2847 if (printed || printname != NULL || number)
2848 fprintf(stdout, STDOUT_NL);
2849 }
2850
2851 /* Prepare to repeat to find the next match in the line. */
2852
2853 match = FALSE;
2854 if (line_buffered) fflush(stdout);
2855 rc = 0; /* Had some success */
2856
2857 /* If the pattern contained a lookbehind that included \K, it is
2858 possible that the end of the match might be at or before the actual
2859 starting offset we have just used. In this case, start one character
2860 further on. */
2861
2862 startoffset = offsets[1]; /* Restart after the match */
2863 oldstartoffset = pcre2_get_startchar(match_data);
2864 if (startoffset <= oldstartoffset)
2865 {
2866 if (startoffset >= length) goto END_ONE_MATCH; /* Were at end */
2867 startoffset = oldstartoffset + 1;
2868 if (utf) while ((ptr[startoffset] & 0xc0) == 0x80) startoffset++;
2869 }
2870
2871 /* If the current match ended past the end of the line (only possible
2872 in multiline mode), we must move on to the line in which it did end
2873 before searching for more matches. */
2874
2875 while (startoffset > linelength)
2876 {
2877 ptr += linelength + endlinelength;
2878 filepos += (int)(linelength + endlinelength);
2879 linenumber++;
2880 startoffset -= (int)(linelength + endlinelength);
2881 t = end_of_line(ptr, endptr, &endlinelength);
2882 linelength = t - ptr - endlinelength;
2883 length = (PCRE2_SIZE)(endptr - ptr);
2884 }
2885
2886 goto ONLY_MATCHING_RESTART;
2887 }
2888 }
2889
2890 /* This is the default case when none of the above options is set. We print
2891 the matching lines(s), possibly preceded and/or followed by other lines of
2892 context. */
2893
2894 else
2895 {
2896 lines_printed = TRUE;
2897
2898 /* See if there is a requirement to print some "after" lines from a
2899 previous match. We never print any overlaps. */
2900
2901 if (after_context > 0 && lastmatchnumber > 0)
2902 {
2903 int ellength;
2904 int linecount = 0;
2905 char *p = lastmatchrestart;
2906
2907 while (p < ptr && linecount < after_context)
2908 {
2909 p = end_of_line(p, ptr, &ellength);
2910 linecount++;
2911 }
2912
2913 /* It is important to advance lastmatchrestart during this printing so
2914 that it interacts correctly with any "before" printing below. Print
2915 each line's data using fwrite() in case there are binary zeroes. */
2916
2917 while (lastmatchrestart < p)
2918 {
2919 char *pp = lastmatchrestart;
2920 if (printname != NULL) fprintf(stdout, "%s-", printname);
2921 if (number) fprintf(stdout, "%lu-", lastmatchnumber++);
2922 pp = end_of_line(pp, endptr, &ellength);
2923 FWRITE_IGNORE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
2924 lastmatchrestart = pp;
2925 }
2926 if (lastmatchrestart != ptr) hyphenpending = TRUE;
2927 }
2928
2929 /* If there were non-contiguous lines printed above, insert hyphens. */
2930
2931 if (hyphenpending)
2932 {
2933 fprintf(stdout, "--" STDOUT_NL);
2934 hyphenpending = FALSE;
2935 hyphenprinted = TRUE;
2936 }
2937
2938 /* See if there is a requirement to print some "before" lines for this
2939 match. Again, don't print overlaps. */
2940
2941 if (before_context > 0)
2942 {
2943 int linecount = 0;
2944 char *p = ptr;
2945
2946 while (p > main_buffer &&
2947 (lastmatchnumber == 0 || p > lastmatchrestart) &&
2948 linecount < before_context)
2949 {
2950 linecount++;
2951 p = previous_line(p, main_buffer);
2952 }
2953
2954 if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
2955 fprintf(stdout, "--" STDOUT_NL);
2956
2957 while (p < ptr)
2958 {
2959 int ellength;
2960 char *pp = p;
2961 if (printname != NULL) fprintf(stdout, "%s-", printname);
2962 if (number) fprintf(stdout, "%lu-", linenumber - linecount--);
2963 pp = end_of_line(pp, endptr, &ellength);
2964 FWRITE_IGNORE(p, 1, pp - p, stdout);
2965 p = pp;
2966 }
2967 }
2968
2969 /* Now print the matching line(s); ensure we set hyphenpending at the end
2970 of the file if any context lines are being output. */
2971
2972 if (after_context > 0 || before_context > 0)
2973 endhyphenpending = TRUE;
2974
2975 if (printname != NULL) fprintf(stdout, "%s:", printname);
2976 if (number) fprintf(stdout, "%lu:", linenumber);
2977
2978 /* This extra option, for Jeffrey Friedl's debugging requirements,
2979 replaces the matched string, or a specific captured string if it exists,
2980 with X. When this happens, colouring is ignored. */
2981
2982#ifdef JFRIEDL_DEBUG
2983 if (S_arg >= 0 && S_arg < mrc)
2984 {
2985 int first = S_arg * 2;
2986 int last = first + 1;
2987 FWRITE_IGNORE(ptr, 1, offsets[first], stdout);
2988 fprintf(stdout, "X");
2989 FWRITE_IGNORE(ptr + offsets[last], 1, linelength - offsets[last], stdout);
2990 }
2991 else
2992#endif
2993
2994 /* In multiline mode, or if colouring, we have to split the line(s) up
2995 and search for further matches, but not of course if the line is a
2996 non-match. In multiline mode this is necessary in case there is another
2997 match that spans the end of the current line. When colouring we want to
2998 colour all matches. */
2999
3000 if ((multiline || do_colour) && !invert)
3001 {
3002 int plength;
3003 PCRE2_SIZE endprevious;
3004
3005 /* The use of \K may make the end offset earlier than the start. In
3006 this situation, swap them round. */
3007
3008 if (offsets[0] > offsets[1])
3009 {
3010 PCRE2_SIZE temp = offsets[0];
3011 offsets[0] = offsets[1];
3012 offsets[1] = temp;
3013 }
3014
3015 FWRITE_IGNORE(ptr, 1, offsets[0], stdout);
3016 print_match(ptr + offsets[0], offsets[1] - offsets[0]);
3017
3018 for (;;)
3019 {
3020 PCRE2_SIZE oldstartoffset = pcre2_get_startchar(match_data);
3021
3022 endprevious = offsets[1];
3023 startoffset = endprevious; /* Advance after previous match. */
3024
3025 /* If the pattern contained a lookbehind that included \K, it is
3026 possible that the end of the match might be at or before the actual
3027 starting offset we have just used. In this case, start one character
3028 further on. */
3029
3030 if (startoffset <= oldstartoffset)
3031 {
3032 startoffset = oldstartoffset + 1;
3033 if (utf) while ((ptr[startoffset] & 0xc0) == 0x80) startoffset++;
3034 }
3035
3036 /* If the current match ended past the end of the line (only possible
3037 in multiline mode), we must move on to the line in which it did end
3038 before searching for more matches. Because the PCRE2_FIRSTLINE option
3039 is set, the start of the match will always be before the first
3040 newline sequence. */
3041
3042 while (startoffset > linelength + endlinelength)
3043 {
3044 ptr += linelength + endlinelength;
3045 filepos += (int)(linelength + endlinelength);
3046 linenumber++;
3047 startoffset -= (int)(linelength + endlinelength);
3048 endprevious -= (int)(linelength + endlinelength);
3049 t = end_of_line(ptr, endptr, &endlinelength);
3050 linelength = t - ptr - endlinelength;
3051 length = (PCRE2_SIZE)(endptr - ptr);
3052 }
3053
3054 /* If startoffset is at the exact end of the line it means this
3055 complete line was the final part of the match, so there is nothing
3056 more to do. */
3057
3058 if (startoffset == linelength + endlinelength) break;
3059
3060 /* Otherwise, run a match from within the final line, and if found,
3061 loop for any that may follow. */
3062
3063 if (!match_patterns(ptr, length, options, startoffset, &mrc)) break;
3064
3065 /* The use of \K may make the end offset earlier than the start. In
3066 this situation, swap them round. */
3067
3068 if (offsets[0] > offsets[1])
3069 {
3070 PCRE2_SIZE temp = offsets[0];
3071 offsets[0] = offsets[1];
3072 offsets[1] = temp;
3073 }
3074
3075 FWRITE_IGNORE(ptr + endprevious, 1, offsets[0] - endprevious, stdout);
3076 print_match(ptr + offsets[0], offsets[1] - offsets[0]);
3077 }
3078
3079 /* In multiline mode, we may have already printed the complete line
3080 and its line-ending characters (if they matched the pattern), so there
3081 may be no more to print. */
3082
3083 plength = (int)((linelength + endlinelength) - endprevious);
3084 if (plength > 0) FWRITE_IGNORE(ptr + endprevious, 1, plength, stdout);
3085 }
3086
3087 /* Not colouring or multiline; no need to search for further matches. */
3088
3089 else FWRITE_IGNORE(ptr, 1, linelength + endlinelength, stdout);
3090 }
3091
3092 /* End of doing what has to be done for a match. If --line-buffered was
3093 given, flush the output. */
3094
3095 if (line_buffered) fflush(stdout);
3096 rc = 0; /* Had some success */
3097
3098 /* Remember where the last match happened for after_context. We remember
3099 where we are about to restart, and that line's number. */
3100
3101 lastmatchrestart = ptr + linelength + endlinelength;
3102 lastmatchnumber = linenumber + 1;
3103
3104 /* If a line was printed and we are now at the end of the file and the last
3105 line had no newline, output one. */
3106
3107 if (lines_printed && lastmatchrestart >= endptr && endlinelength == 0)
3108 write_final_newline();
3109 }
3110
3111 /* For a match in multiline inverted mode (which of course did not cause
3112 anything to be printed), we have to move on to the end of the match before
3113 proceeding. */
3114
3115 if (multiline && invert && match)
3116 {
3117 int ellength;
3118 char *endmatch = ptr + offsets[1];
3119 t = ptr;
3120 while (t < endmatch)
3121 {
3122 t = end_of_line(t, endptr, &ellength);
3123 if (t <= endmatch) linenumber++; else break;
3124 }
3125 endmatch = end_of_line(endmatch, endptr, &ellength);
3126 linelength = endmatch - ptr - ellength;
3127 }
3128
3129 /* Advance to after the newline and increment the line number. The file
3130 offset to the current line is maintained in filepos. */
3131
3132 END_ONE_MATCH:
3133 ptr += linelength + endlinelength;
3134 filepos += (int)(linelength + endlinelength);
3135 linenumber++;
3136
3137 /* If there was at least one match (or a non-match, as required) in the line,
3138 increment the count for the -m option. */
3139
3140 if (line_matched) count_matched_lines++;
3141
3142 /* If input is line buffered, and the buffer is not yet full, read another
3143 line and add it into the buffer. */
3144
3145 if (input_line_buffered && bufflength < (PCRE2_SIZE)bufsize)
3146 {
3147 int add = read_one_line(ptr, bufsize - (int)(ptr - main_buffer), in);
3148 bufflength += add;
3149 endptr += add;
3150 }
3151
3152 /* If we haven't yet reached the end of the file (the buffer is full), and
3153 the current point is in the top 1/3 of the buffer, slide the buffer down by
3154 1/3 and refill it. Before we do this, if some unprinted "after" lines are
3155 about to be lost, print them. */
3156
3157 if (bufflength >= (PCRE2_SIZE)bufsize && ptr > main_buffer + 2*bufthird)
3158 {
3159 if (after_context > 0 &&
3160 lastmatchnumber > 0 &&
3161 lastmatchrestart < main_buffer + bufthird)
3162 {
3163 do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
3164 lastmatchnumber = 0; /* Indicates no after lines pending */
3165 }
3166
3167 /* Now do the shuffle */
3168
3169 (void)memmove(main_buffer, main_buffer + bufthird, 2*bufthird);
3170 ptr -= bufthird;
3171
3172 bufflength = 2*bufthird + fill_buffer(handle, frtype,
3173 main_buffer + 2*bufthird, bufthird, input_line_buffered);
3174 endptr = main_buffer + bufflength;
3175
3176 /* Adjust any last match point */
3177
3178 if (lastmatchnumber > 0) lastmatchrestart -= bufthird;
3179 }
3180 } /* Loop through the whole file */
3181
3182/* End of file; print final "after" lines if wanted; do_after_lines sets
3183hyphenpending if it prints something. */
3184
3185if (only_matching_count == 0 && !(count_only|show_total_count))
3186 {
3187 do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
3188 hyphenpending |= endhyphenpending;
3189 }
3190
3191/* Print the file name if we are looking for those without matches and there
3192were none. If we found a match, we won't have got this far. */
3193
3194if (filenames == FN_NOMATCH_ONLY)
3195 {
3196 fprintf(stdout, "%s" STDOUT_NL, printname);
3197 return 0;
3198 }
3199
3200/* Print the match count if wanted */
3201
3202if (count_only && !quiet)
3203 {
3204 if (count > 0 || !omit_zero_count)
3205 {
3206 if (printname != NULL && filenames != FN_NONE)
3207 fprintf(stdout, "%s:", printname);
3208 fprintf(stdout, "%lu" STDOUT_NL, count);
3209 counts_printed++;
3210 }
3211 }
3212
3213total_count += count; /* Can be set without count_only */
3214return rc;
3215}
3216
3217
3218
3219/*************************************************
3220* Grep a file or recurse into a directory *
3221*************************************************/
3222
3223/* Given a path name, if it's a directory, scan all the files if we are
3224recursing; if it's a file, grep it.
3225
3226Arguments:
3227 pathname the path to investigate
3228 dir_recurse TRUE if recursing is wanted (-r or -drecurse)
3229 only_one_at_top TRUE if the path is the only one at toplevel
3230
3231Returns: -1 the file/directory was skipped
3232 0 if there was at least one match
3233 1 if there were no matches
3234 2 there was some kind of error
3235
3236However, file opening failures are suppressed if "silent" is set.
3237*/
3238
3239static int
3240grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top)
3241{
3242int rc = 1;
3243int frtype;
3244void *handle;
3245char *lastcomp;
3246FILE *in = NULL; /* Ensure initialized */
3247
3248#ifdef SUPPORT_LIBZ
3249gzFile ingz = NULL;
3250#endif
3251
3252#ifdef SUPPORT_LIBBZ2
3253BZFILE *inbz2 = NULL;
3254#endif
3255
3256#if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
3257int pathlen;
3258#endif
3259
3260#if defined NATIVE_ZOS
3261int zos_type;
3262FILE *zos_test_file;
3263#endif
3264
3265/* If the file name is "-" we scan stdin */
3266
3267if (strcmp(pathname, "-") == 0)
3268 {
3269 return pcre2grep(stdin, FR_PLAIN, stdin_name,
3270 (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
3271 stdin_name : NULL);
3272 }
3273
3274/* Inclusion and exclusion: --include-dir and --exclude-dir apply only to
3275directories, whereas --include and --exclude apply to everything else. The test
3276is against the final component of the path. */
3277
3278lastcomp = strrchr(pathname, FILESEP);
3279lastcomp = (lastcomp == NULL)? pathname : lastcomp + 1;
3280
3281/* If the file is a directory, skip if not recursing or if explicitly excluded.
3282Otherwise, scan the directory and recurse for each path within it. The scanning
3283code is localized so it can be made system-specific. */
3284
3285
3286/* For z/OS, determine the file type. */
3287
3288#if defined NATIVE_ZOS
3289zos_test_file = fopen(pathname,"rb");
3290
3291if (zos_test_file == NULL)
3292 {
3293 if (!silent) fprintf(stderr, "pcre2grep: failed to test next file %s\n",
3294 pathname, strerror(errno));
3295 return -1;
3296 }
3297zos_type = identifyzosfiletype (zos_test_file);
3298fclose (zos_test_file);
3299
3300/* Handle a PDS in separate code */
3301
3302if (zos_type == __ZOS_PDS || zos_type == __ZOS_PDSE)
3303 {
3304 return travelonpdsdir (pathname, only_one_at_top);
3305 }
3306
3307/* Deal with regular files in the normal way below. These types are:
3308 zos_type == __ZOS_PDS_MEMBER
3309 zos_type == __ZOS_PS
3310 zos_type == __ZOS_VSAM_KSDS
3311 zos_type == __ZOS_VSAM_ESDS
3312 zos_type == __ZOS_VSAM_RRDS
3313*/
3314
3315/* Handle a z/OS directory using common code. */
3316
3317else if (zos_type == __ZOS_HFS)
3318 {
3319#endif /* NATIVE_ZOS */
3320
3321
3322/* Handle directories: common code for all OS */
3323
3324if (isdirectory(pathname))
3325 {
3326 if (dee_action == dee_SKIP ||
3327 !test_incexc(lastcomp, include_dir_patterns, exclude_dir_patterns))
3328 return -1;
3329
3330 if (dee_action == dee_RECURSE)
3331 {
3332 char childpath[FNBUFSIZ];
3333 char *nextfile;
3334 directory_type *dir = opendirectory(pathname);
3335
3336 if (dir == NULL)
3337 {
3338 if (!silent)
3339 fprintf(stderr, "pcre2grep: Failed to open directory %s: %s\n", pathname,
3340 strerror(errno));
3341 return 2;
3342 }
3343
3344 while ((nextfile = readdirectory(dir)) != NULL)
3345 {
3346 int frc;
3347 int fnlength = strlen(pathname) + strlen(nextfile) + 2;
3348 if (fnlength > FNBUFSIZ)
3349 {
3350 fprintf(stderr, "pcre2grep: recursive filename is too long\n");
3351 rc = 2;
3352 break;
3353 }
3354 sprintf(childpath, "%s%c%s", pathname, FILESEP, nextfile);
3355
3356 /* If the realpath() function is available, we can try to prevent endless
3357 recursion caused by a symlink pointing to a parent directory (GitHub
3358 issue #2 (old Bugzilla #2794). Original patch from Thomas Tempelmann.
3359 Modified to avoid using strlcat() because that isn't a standard C
3360 function, and also modified not to copy back the fully resolved path,
3361 because that affects the output from pcre2grep. */
3362
3363#ifdef HAVE_REALPATH
3364 char resolvedpath[PATH_MAX];
3365 if (realpath(childpath, resolvedpath) == NULL)
3366 continue; /* This path is invalid - we can skip processing this */
3367 BOOL isSame = strcmp(pathname, resolvedpath) == 0;
3368 if (isSame) continue; /* We have a recursion */
3369 size_t rlen = strlen(resolvedpath);
3370 if (rlen++ < sizeof(resolvedpath) - 3)
3371 {
3372 strcat(resolvedpath, "/");
3373 BOOL contained = strncmp(pathname, resolvedpath, rlen) == 0;
3374 if (contained) continue; /* We have a recursion */
3375 }
3376#endif /* HAVE_REALPATH */
3377
3378 frc = grep_or_recurse(childpath, dir_recurse, FALSE);
3379 if (frc > 1) rc = frc;
3380 else if (frc == 0 && rc == 1) rc = 0;
3381 }
3382
3383 closedirectory(dir);
3384 return rc;
3385 }
3386 }
3387
3388#ifdef WIN32
3389if (iswild(pathname))
3390 {
3391 char buffer[1024];
3392 char *nextfile;
3393 char *name;
3394 directory_type *dir = opendirectory(pathname);
3395
3396 if (dir == NULL)
3397 return 0;
3398
3399 for (nextfile = name = pathname; *nextfile != 0; nextfile++)
3400 if (*nextfile == '/' || *nextfile == '\\')
3401 name = nextfile + 1;
3402 *name = 0;
3403
3404 while ((nextfile = readdirectory(dir)) != NULL)
3405 {
3406 int frc;
3407 sprintf(buffer, "%.512s%.128s", pathname, nextfile);
3408 frc = grep_or_recurse(buffer, dir_recurse, FALSE);
3409 if (frc > 1) rc = frc;
3410 else if (frc == 0 && rc == 1) rc = 0;
3411 }
3412
3413 closedirectory(dir);
3414 return rc;
3415 }
3416#endif
3417
3418#if defined NATIVE_ZOS
3419 }
3420#endif
3421
3422/* If the file is not a directory, check for a regular file, and if it is not,
3423skip it if that's been requested. Otherwise, check for an explicit inclusion or
3424exclusion. */
3425
3426else if (
3427#if defined NATIVE_ZOS
3428 (zos_type == __ZOS_NOFILE && DEE_action == DEE_SKIP) ||
3429#else /* all other OS */
3430 (!isregfile(pathname) && DEE_action == DEE_SKIP) ||
3431#endif
3432 !test_incexc(lastcomp, include_patterns, exclude_patterns))
3433 return -1; /* File skipped */
3434
3435/* Control reaches here if we have a regular file, or if we have a directory
3436and recursion or skipping was not requested, or if we have anything else and
3437skipping was not requested. The scan proceeds. If this is the first and only
3438argument at top level, we don't show the file name, unless we are only showing
3439the file name, or the filename was forced (-H). */
3440
3441#if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
3442pathlen = (int)(strlen(pathname));
3443#endif
3444
3445/* Open using zlib if it is supported and the file name ends with .gz. */
3446
3447#ifdef SUPPORT_LIBZ
3448if (pathlen > 3 && strcmp(pathname + pathlen - 3, ".gz") == 0)
3449 {
3450 ingz = gzopen(pathname, "rb");
3451 if (ingz == NULL)
3452 {
3453 if (!silent)
3454 fprintf(stderr, "pcre2grep: Failed to open %s: %s\n", pathname,
3455 strerror(errno));
3456 return 2;
3457 }
3458 handle = (void *)ingz;
3459 frtype = FR_LIBZ;
3460 }
3461else
3462#endif
3463
3464/* Otherwise open with bz2lib if it is supported and the name ends with .bz2. */
3465
3466#ifdef SUPPORT_LIBBZ2
3467if (pathlen > 4 && strcmp(pathname + pathlen - 4, ".bz2") == 0)
3468 {
3469 inbz2 = BZ2_bzopen(pathname, "rb");
3470 handle = (void *)inbz2;
3471 frtype = FR_LIBBZ2;
3472 }
3473else
3474#endif
3475
3476/* Otherwise use plain fopen(). The label is so that we can come back here if
3477an attempt to read a .bz2 file indicates that it really is a plain file. */
3478
3479#ifdef SUPPORT_LIBBZ2
3480PLAIN_FILE:
3481#endif
3482 {
3483 in = fopen(pathname, "rb");
3484 handle = (void *)in;
3485 frtype = FR_PLAIN;
3486 }
3487
3488/* All the opening methods return errno when they fail. */
3489
3490if (handle == NULL)
3491 {
3492 if (!silent)
3493 fprintf(stderr, "pcre2grep: Failed to open %s: %s\n", pathname,
3494 strerror(errno));
3495 return 2;
3496 }
3497
3498/* Now grep the file */
3499
3500rc = pcre2grep(handle, frtype, pathname, (filenames > FN_DEFAULT ||
3501 (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
3502
3503/* Close in an appropriate manner. */
3504
3505#ifdef SUPPORT_LIBZ
3506if (frtype == FR_LIBZ)
3507 gzclose(ingz);
3508else
3509#endif
3510
3511/* If it is a .bz2 file and the result is 3, it means that the first attempt to
3512read failed. If the error indicates that the file isn't in fact bzipped, try
3513again as a normal file. */
3514
3515#ifdef SUPPORT_LIBBZ2
3516if (frtype == FR_LIBBZ2)
3517 {
3518 if (rc == 3)
3519 {
3520 int errnum;
3521 const char *err = BZ2_bzerror(inbz2, &errnum);
3522 if (errnum == BZ_DATA_ERROR_MAGIC)
3523 {
3524 BZ2_bzclose(inbz2);
3525 goto PLAIN_FILE;
3526 }
3527 else if (!silent)
3528 fprintf(stderr, "pcre2grep: Failed to read %s using bzlib: %s\n",
3529 pathname, err);
3530 rc = 2; /* The normal "something went wrong" code */
3531 }
3532 BZ2_bzclose(inbz2);
3533 }
3534else
3535#endif
3536
3537/* Normal file close */
3538
3539fclose(in);
3540
3541/* Pass back the yield from pcre2grep(). */
3542
3543return rc;
3544}
3545
3546
3547
3548/*************************************************
3549* Handle a no-data option *
3550*************************************************/
3551
3552static int
3553handle_option(int letter, int options)
3554{
3555switch(letter)
3556 {
3557 case N_FOFFSETS: file_offsets = TRUE; break;
3558 case N_HELP: help(); pcre2grep_exit(0); break; /* Stops compiler warning */
3559 case N_LBUFFER: line_buffered = TRUE; break;
3560 case N_LOFFSETS: line_offsets = number = TRUE; break;
3561 case N_NOJIT: use_jit = FALSE; break;
3562 case N_ALLABSK: extra_options |= PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK; break;
3563 case 'a': binary_files = BIN_TEXT; break;
3564 case 'c': count_only = TRUE; break;
3565 case 'F': options |= PCRE2_LITERAL; break;
3566 case 'H': filenames = FN_FORCE; break;
3567 case 'I': binary_files = BIN_NOMATCH; break;
3568 case 'h': filenames = FN_NONE; break;
3569 case 'i': options |= PCRE2_CASELESS; break;
3570 case 'l': omit_zero_count = TRUE; filenames = FN_MATCH_ONLY; break;
3571 case 'L': filenames = FN_NOMATCH_ONLY; break;
3572 case 'M': multiline = TRUE; options |= PCRE2_MULTILINE|PCRE2_FIRSTLINE; break;
3573 case 'n': number = TRUE; break;
3574
3575 case 'o':
3576 only_matching_last = add_number(0, only_matching_last);
3577 if (only_matching == NULL) only_matching = only_matching_last;
3578 break;
3579
3580 case 'q': quiet = TRUE; break;
3581 case 'r': dee_action = dee_RECURSE; break;
3582 case 's': silent = TRUE; break;
3583 case 't': show_total_count = TRUE; break;
3584 case 'u': options |= PCRE2_UTF; utf = TRUE; break;
3585 case 'U': options |= PCRE2_UTF|PCRE2_MATCH_INVALID_UTF; utf = TRUE; break;
3586 case 'v': invert = TRUE; break;
3587 case 'w': extra_options |= PCRE2_EXTRA_MATCH_WORD; break;
3588 case 'x': extra_options |= PCRE2_EXTRA_MATCH_LINE; break;
3589
3590 case 'V':
3591 {
3592 unsigned char buffer[128];
3593 (void)pcre2_config(PCRE2_CONFIG_VERSION, buffer);
3594 fprintf(stdout, "pcre2grep version %s" STDOUT_NL, buffer);
3595 }
3596 pcre2grep_exit(0);
3597 break;
3598
3599 default:
3600 fprintf(stderr, "pcre2grep: Unknown option -%c\n", letter);
3601 pcre2grep_exit(usage(2));
3602 }
3603
3604return options;
3605}
3606
3607
3608
3609/*************************************************
3610* Construct printed ordinal *
3611*************************************************/
3612
3613/* This turns a number into "1st", "3rd", etc. */
3614
3615static char *
3616ordin(int n)
3617{
3618static char buffer[14];
3619char *p = buffer;
3620sprintf(p, "%d", n);
3621while (*p != 0) p++;
3622n %= 100;
3623if (n >= 11 && n <= 13) n = 0;
3624switch (n%10)
3625 {
3626 case 1: strcpy(p, "st"); break;
3627 case 2: strcpy(p, "nd"); break;
3628 case 3: strcpy(p, "rd"); break;
3629 default: strcpy(p, "th"); break;
3630 }
3631return buffer;
3632}
3633
3634
3635
3636/*************************************************
3637* Compile a single pattern *
3638*************************************************/
3639
3640/* Do nothing if the pattern has already been compiled. This is the case for
3641include/exclude patterns read from a file.
3642
3643When the -F option has been used, each "pattern" may be a list of strings,
3644separated by line breaks. They will be matched literally. We split such a
3645string and compile the first substring, inserting an additional block into the
3646pattern chain.
3647
3648Arguments:
3649 p points to the pattern block
3650 options the PCRE options
3651 fromfile TRUE if the pattern was read from a file
3652 fromtext file name or identifying text (e.g. "include")
3653 count 0 if this is the only command line pattern, or
3654 number of the command line pattern, or
3655 linenumber for a pattern from a file
3656
3657Returns: TRUE on success, FALSE after an error
3658*/
3659
3660static BOOL
3661compile_pattern(patstr *p, int options, int fromfile, const char *fromtext,
3662 int count)
3663{
3664char *ps;
3665int errcode;
3666PCRE2_SIZE patlen, erroffset;
3667PCRE2_UCHAR errmessbuffer[ERRBUFSIZ];
3668
3669if (p->compiled != NULL) return TRUE;
3670ps = p->string;
3671patlen = p->length;
3672
3673if ((options & PCRE2_LITERAL) != 0)
3674 {
3675 int ellength;
3676 char *eop = ps + patlen;
3677 char *pe = end_of_line(ps, eop, &ellength);
3678
3679 if (ellength != 0)
3680 {
3681 patlen = pe - ps - ellength;
3682 if (add_pattern(pe, p->length-patlen-ellength, p) == NULL) return FALSE;
3683 }
3684 }
3685
3686p->compiled = pcre2_compile((PCRE2_SPTR)ps, patlen, options, &errcode,
3687 &erroffset, compile_context);
3688
3689/* Handle successful compile. Try JIT-compiling if supported and enabled. We
3690ignore any JIT compiler errors, relying falling back to interpreting if
3691anything goes wrong with JIT. */
3692
3693if (p->compiled != NULL)
3694 {
3695#ifdef SUPPORT_PCRE2GREP_JIT
3696 if (use_jit) (void)pcre2_jit_compile(p->compiled, PCRE2_JIT_COMPLETE);
3697#endif
3698 return TRUE;
3699 }
3700
3701/* Handle compile errors */
3702
3703if (erroffset > patlen) erroffset = patlen;
3704pcre2_get_error_message(errcode, errmessbuffer, sizeof(errmessbuffer));
3705
3706if (fromfile)
3707 {
3708 fprintf(stderr, "pcre2grep: Error in regex in line %d of %s "
3709 "at offset %d: %s\n", count, fromtext, (int)erroffset, errmessbuffer);
3710 }
3711else
3712 {
3713 if (count == 0)
3714 fprintf(stderr, "pcre2grep: Error in %s regex at offset %d: %s\n",
3715 fromtext, (int)erroffset, errmessbuffer);
3716 else
3717 fprintf(stderr, "pcre2grep: Error in %s %s regex at offset %d: %s\n",
3718 ordin(count), fromtext, (int)erroffset, errmessbuffer);
3719 }
3720
3721return FALSE;
3722}
3723
3724
3725
3726/*************************************************
3727* Read and compile a file of patterns *
3728*************************************************/
3729
3730/* This is used for --filelist, --include-from, and --exclude-from.
3731
3732Arguments:
3733 name the name of the file; "-" is stdin
3734 patptr pointer to the pattern chain anchor
3735 patlastptr pointer to the last pattern pointer
3736
3737Returns: TRUE if all went well
3738*/
3739
3740static BOOL
3741read_pattern_file(char *name, patstr **patptr, patstr **patlastptr)
3742{
3743int linenumber = 0;
3744PCRE2_SIZE patlen;
3745FILE *f;
3746const char *filename;
3747char buffer[MAXPATLEN+20];
3748
3749if (strcmp(name, "-") == 0)
3750 {
3751 f = stdin;
3752 filename = stdin_name;
3753 }
3754else
3755 {
3756 f = fopen(name, "r");
3757 if (f == NULL)
3758 {
3759 fprintf(stderr, "pcre2grep: Failed to open %s: %s\n", name, strerror(errno));
3760 return FALSE;
3761 }
3762 filename = name;
3763 }
3764
3765while ((patlen = read_one_line(buffer, sizeof(buffer), f)) > 0)
3766 {
3767 while (patlen > 0 && isspace((unsigned char)(buffer[patlen-1]))) patlen--;
3768 linenumber++;
3769 if (patlen == 0) continue; /* Skip blank lines */
3770
3771 /* Note: this call to add_pattern() puts a pointer to the local variable
3772 "buffer" into the pattern chain. However, that pointer is used only when
3773 compiling the pattern, which happens immediately below, so we flatten it
3774 afterwards, as a precaution against any later code trying to use it. */
3775
3776 *patlastptr = add_pattern(buffer, patlen, *patlastptr);
3777 if (*patlastptr == NULL)
3778 {
3779 if (f != stdin) fclose(f);
3780 return FALSE;
3781 }
3782 if (*patptr == NULL) *patptr = *patlastptr;
3783
3784 /* This loop is needed because compiling a "pattern" when -F is set may add
3785 on additional literal patterns if the original contains a newline. In the
3786 common case, it never will, because read_one_line() stops at a newline.
3787 However, the -N option can be used to give pcre2grep a different newline
3788 setting. */
3789
3790 for(;;)
3791 {
3792 if (!compile_pattern(*patlastptr, pcre2_options, TRUE, filename,
3793 linenumber))
3794 {
3795 if (f != stdin) fclose(f);
3796 return FALSE;
3797 }
3798 (*patlastptr)->string = NULL; /* Insurance */
3799 if ((*patlastptr)->next == NULL) break;
3800 *patlastptr = (*patlastptr)->next;
3801 }
3802 }
3803
3804if (f != stdin) fclose(f);
3805return TRUE;
3806}
3807
3808
3809
3810/*************************************************
3811* Main program *
3812*************************************************/
3813
3814/* Returns 0 if something matched, 1 if nothing matched, 2 after an error. */
3815
3816int
3817main(int argc, char **argv)
3818{
3819int i, j;
3820int rc = 1;
3821BOOL only_one_at_top;
3822patstr *cp;
3823fnstr *fn;
3824omstr *om;
3825const char *locale_from = "--locale";
3826
3827#ifdef SUPPORT_PCRE2GREP_JIT
3828pcre2_jit_stack *jit_stack = NULL;
3829#endif
3830
3831/* In Windows, stdout is set up as a text stream, which means that \n is
3832converted to \r\n. This causes output lines that are copied from the input to
3833change from ....\r\n to ....\r\r\n, which is not right. We therefore ensure
3834that stdout is a binary stream. Note that this means all other output to stdout
3835must use STDOUT_NL to terminate lines. */
3836
3837#ifdef WIN32
3838_setmode(_fileno(stdout), _O_BINARY);
3839#endif
3840
3841/* Process the options */
3842
3843for (i = 1; i < argc; i++)
3844 {
3845 option_item *op = NULL;
3846 char *option_data = (char *)""; /* default to keep compiler happy */
3847 BOOL longop;
3848 BOOL longopwasequals = FALSE;
3849
3850 if (argv[i][0] != '-') break;
3851
3852 /* If we hit an argument that is just "-", it may be a reference to STDIN,
3853 but only if we have previously had -e or -f to define the patterns. */
3854
3855 if (argv[i][1] == 0)
3856 {
3857 if (pattern_files != NULL || patterns != NULL) break;
3858 else pcre2grep_exit(usage(2));
3859 }
3860
3861 /* Handle a long name option, or -- to terminate the options */
3862
3863 if (argv[i][1] == '-')
3864 {
3865 char *arg = argv[i] + 2;
3866 char *argequals = strchr(arg, '=');
3867
3868 if (*arg == 0) /* -- terminates options */
3869 {
3870 i++;
3871 break; /* out of the options-handling loop */
3872 }
3873
3874 longop = TRUE;
3875
3876 /* Some long options have data that follows after =, for example file=name.
3877 Some options have variations in the long name spelling: specifically, we
3878 allow "regexp" because GNU grep allows it, though I personally go along
3879 with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".
3880 These options are entered in the table as "regex(p)". Options can be in
3881 both these categories. */
3882
3883 for (op = optionlist; op->one_char != 0; op++)
3884 {
3885 char *opbra = strchr(op->long_name, '(');
3886 char *equals = strchr(op->long_name, '=');
3887
3888 /* Handle options with only one spelling of the name */
3889
3890 if (opbra == NULL) /* Does not contain '(' */
3891 {
3892 if (equals == NULL) /* Not thing=data case */
3893 {
3894 if (strcmp(arg, op->long_name) == 0) break;
3895 }
3896 else /* Special case xxx=data */
3897 {
3898 int oplen = (int)(equals - op->long_name);
3899 int arglen = (argequals == NULL)?
3900 (int)strlen(arg) : (int)(argequals - arg);
3901 if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
3902 {
3903 option_data = arg + arglen;
3904 if (*option_data == '=')
3905 {
3906 option_data++;
3907 longopwasequals = TRUE;
3908 }
3909 break;
3910 }
3911 }
3912 }
3913
3914 /* Handle options with an alternate spelling of the name */
3915
3916 else
3917 {
3918 char buff1[24];
3919 char buff2[24];
3920 int ret;
3921
3922 int baselen = (int)(opbra - op->long_name);
3923 int fulllen = (int)(strchr(op->long_name, ')') - op->long_name + 1);
3924 int arglen = (argequals == NULL || equals == NULL)?
3925 (int)strlen(arg) : (int)(argequals - arg);
3926
3927 if ((ret = snprintf(buff1, sizeof(buff1), "%.*s", baselen, op->long_name),
3928 ret < 0 || ret > (int)sizeof(buff1)) ||
3929 (ret = snprintf(buff2, sizeof(buff2), "%s%.*s", buff1,
3930 fulllen - baselen - 2, opbra + 1),
3931 ret < 0 || ret > (int)sizeof(buff2)))
3932 {
3933 fprintf(stderr, "pcre2grep: Buffer overflow when parsing %s option\n",
3934 op->long_name);
3935 pcre2grep_exit(2);
3936 }
3937
3938 if (strncmp(arg, buff1, arglen) == 0 ||
3939 strncmp(arg, buff2, arglen) == 0)
3940 {
3941 if (equals != NULL && argequals != NULL)
3942 {
3943 option_data = argequals;
3944 if (*option_data == '=')
3945 {
3946 option_data++;
3947 longopwasequals = TRUE;
3948 }
3949 }
3950 break;
3951 }
3952 }
3953 }
3954
3955 if (op->one_char == 0)
3956 {
3957 fprintf(stderr, "pcre2grep: Unknown option %s\n", argv[i]);
3958 pcre2grep_exit(usage(2));
3959 }
3960 }
3961
3962 /* Jeffrey Friedl's debugging harness uses these additional options which
3963 are not in the right form for putting in the option table because they use
3964 only one hyphen, yet are more than one character long. By putting them
3965 separately here, they will not get displayed as part of the help() output,
3966 but I don't think Jeffrey will care about that. */
3967
3968#ifdef JFRIEDL_DEBUG
3969 else if (strcmp(argv[i], "-pre") == 0) {
3970 jfriedl_prefix = argv[++i];
3971 continue;
3972 } else if (strcmp(argv[i], "-post") == 0) {
3973 jfriedl_postfix = argv[++i];
3974 continue;
3975 } else if (strcmp(argv[i], "-XT") == 0) {
3976 sscanf(argv[++i], "%d", &jfriedl_XT);
3977 continue;
3978 } else if (strcmp(argv[i], "-XR") == 0) {
3979 sscanf(argv[++i], "%d", &jfriedl_XR);
3980 continue;
3981 }
3982#endif
3983
3984
3985 /* One-char options; many that have no data may be in a single argument; we
3986 continue till we hit the last one or one that needs data. */
3987
3988 else
3989 {
3990 char *s = argv[i] + 1;
3991 longop = FALSE;
3992
3993 while (*s != 0)
3994 {
3995 for (op = optionlist; op->one_char != 0; op++)
3996 {
3997 if (*s == op->one_char) break;
3998 }
3999 if (op->one_char == 0)
4000 {
4001 fprintf(stderr, "pcre2grep: Unknown option letter '%c' in \"%s\"\n",
4002 *s, argv[i]);
4003 pcre2grep_exit(usage(2));
4004 }
4005
4006 option_data = s+1;
4007
4008 /* Break out if this is the last character in the string; it's handled
4009 below like a single multi-char option. */
4010
4011 if (*option_data == 0) break;
4012
4013 /* Check for a single-character option that has data: OP_OP_NUMBER(S)
4014 are used for ones that either have a numerical number or defaults, i.e.
4015 the data is optional. If a digit follows, there is data; if not, carry on
4016 with other single-character options in the same string. */
4017
4018 if (op->type == OP_OP_NUMBER || op->type == OP_OP_NUMBERS)
4019 {
4020 if (isdigit((unsigned char)s[1])) break;
4021 }
4022 else /* Check for an option with data */
4023 {
4024 if (op->type != OP_NODATA) break;
4025 }
4026
4027 /* Handle a single-character option with no data, then loop for the
4028 next character in the string. */
4029
4030 pcre2_options = handle_option(*s++, pcre2_options);
4031 }
4032 }
4033
4034 /* At this point we should have op pointing to a matched option. If the type
4035 is NO_DATA, it means that there is no data, and the option might set
4036 something in the PCRE options. */
4037
4038 if (op->type == OP_NODATA)
4039 {
4040 pcre2_options = handle_option(op->one_char, pcre2_options);
4041 continue;
4042 }
4043
4044 /* If the option type is OP_OP_STRING or OP_OP_NUMBER(S), it's an option that
4045 either has a value or defaults to something. It cannot have data in a
4046 separate item. At the moment, the only such options are "colo(u)r",
4047 "only-matching", and Jeffrey Friedl's special -S debugging option. */
4048
4049 if (*option_data == 0 &&
4050 (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER ||
4051 op->type == OP_OP_NUMBERS))
4052 {
4053 switch (op->one_char)
4054 {
4055 case N_COLOUR:
4056 colour_option = "auto";
4057 break;
4058
4059 case 'o':
4060 only_matching_last = add_number(0, only_matching_last);
4061 if (only_matching == NULL) only_matching = only_matching_last;
4062 break;
4063
4064#ifdef JFRIEDL_DEBUG
4065 case 'S':
4066 S_arg = 0;
4067 break;
4068#endif
4069 }
4070 continue;
4071 }
4072
4073 /* Otherwise, find the data string for the option. */
4074
4075 if (*option_data == 0)
4076 {
4077 if (i >= argc - 1 || longopwasequals)
4078 {
4079 fprintf(stderr, "pcre2grep: Data missing after %s\n", argv[i]);
4080 pcre2grep_exit(usage(2));
4081 }
4082 option_data = argv[++i];
4083 }
4084
4085 /* If the option type is OP_OP_NUMBERS, the value is a number that is to be
4086 added to a chain of numbers. */
4087
4088 if (op->type == OP_OP_NUMBERS)
4089 {
4090 unsigned long int n = decode_number(option_data, op, longop);
4091 omdatastr *omd = (omdatastr *)op->dataptr;
4092 *(omd->lastptr) = add_number((int)n, *(omd->lastptr));
4093 if (*(omd->anchor) == NULL) *(omd->anchor) = *(omd->lastptr);
4094 }
4095
4096 /* If the option type is OP_PATLIST, it's the -e option, or one of the
4097 include/exclude options, which can be called multiple times to create lists
4098 of patterns. */
4099
4100 else if (op->type == OP_PATLIST)
4101 {
4102 patdatastr *pd = (patdatastr *)op->dataptr;
4103 *(pd->lastptr) = add_pattern(option_data, (PCRE2_SIZE)strlen(option_data),
4104 *(pd->lastptr));
4105 if (*(pd->lastptr) == NULL) goto EXIT2;
4106 if (*(pd->anchor) == NULL) *(pd->anchor) = *(pd->lastptr);
4107 }
4108
4109 /* If the option type is OP_FILELIST, it's one of the options that names a
4110 file. */
4111
4112 else if (op->type == OP_FILELIST)
4113 {
4114 fndatastr *fd = (fndatastr *)op->dataptr;
4115 fn = (fnstr *)malloc(sizeof(fnstr));
4116 if (fn == NULL)
4117 {
4118 fprintf(stderr, "pcre2grep: malloc failed\n");
4119 goto EXIT2;
4120 }
4121 fn->next = NULL;
4122 fn->name = option_data;
4123 if (*(fd->anchor) == NULL)
4124 *(fd->anchor) = fn;
4125 else
4126 (*(fd->lastptr))->next = fn;
4127 *(fd->lastptr) = fn;
4128 }
4129
4130 /* Handle OP_BINARY_FILES */
4131
4132 else if (op->type == OP_BINFILES)
4133 {
4134 if (strcmp(option_data, "binary") == 0)
4135 binary_files = BIN_BINARY;
4136 else if (strcmp(option_data, "without-match") == 0)
4137 binary_files = BIN_NOMATCH;
4138 else if (strcmp(option_data, "text") == 0)
4139 binary_files = BIN_TEXT;
4140 else
4141 {
4142 fprintf(stderr, "pcre2grep: unknown value \"%s\" for binary-files\n",
4143 option_data);
4144 pcre2grep_exit(usage(2));
4145 }
4146 }
4147
4148 /* Otherwise, deal with a single string or numeric data value. */
4149
4150 else if (op->type != OP_NUMBER && op->type != OP_U32NUMBER &&
4151 op->type != OP_OP_NUMBER && op->type != OP_SIZE)
4152 {
4153 *((char **)op->dataptr) = option_data;
4154 }
4155 else
4156 {
4157 unsigned long int n = decode_number(option_data, op, longop);
4158 if (op->type == OP_U32NUMBER) *((uint32_t *)op->dataptr) = n;
4159 else if (op->type == OP_SIZE) *((PCRE2_SIZE *)op->dataptr) = n;
4160 else *((int *)op->dataptr) = n;
4161 }
4162 }
4163
4164/* Options have been decoded. If -C was used, its value is used as a default
4165for -A and -B. */
4166
4167if (both_context > 0)
4168 {
4169 if (after_context == 0) after_context = both_context;
4170 if (before_context == 0) before_context = both_context;
4171 }
4172
4173/* Only one of --only-matching, --output, --file-offsets, or --line-offsets is
4174permitted. They display, each in their own way, only the data that has matched.
4175*/
4176
4177only_matching_count = (only_matching != NULL) + (output_text != NULL) +
4178 file_offsets + line_offsets;
4179
4180if (only_matching_count > 1)
4181 {
4182 fprintf(stderr, "pcre2grep: Cannot mix --only-matching, --output, "
4183 "--file-offsets and/or --line-offsets\n");
4184 pcre2grep_exit(usage(2));
4185 }
4186
4187
4188/* Check that there is a big enough ovector for all -o settings. */
4189
4190for (om = only_matching; om != NULL; om = om->next)
4191 {
4192 int n = om->groupnum;
4193 if (n > (int)capture_max)
4194 {
4195 fprintf(stderr, "pcre2grep: Requested group %d cannot be captured.\n", n);
4196 fprintf(stderr, "pcre2grep: Use --om-capture to increase the size of the capture vector.\n");
4197 goto EXIT2;
4198 }
4199 }
4200
4201/* Check the text supplied to --output for errors. */
4202
4203if (output_text != NULL &&
4204 !syntax_check_output_text((PCRE2_SPTR)output_text, FALSE))
4205 goto EXIT2;
4206
4207/* Set up default compile and match contexts and a match data block. */
4208
4209offset_size = capture_max + 1;
4210compile_context = pcre2_compile_context_create(NULL);
4211match_context = pcre2_match_context_create(NULL);
4212match_data = pcre2_match_data_create(offset_size, NULL);
4213offsets = pcre2_get_ovector_pointer(match_data);
4214
4215/* If string (script) callouts are supported, set up the callout processing
4216function. */
4217
4218#ifdef SUPPORT_PCRE2GREP_CALLOUT
4219pcre2_set_callout(match_context, pcre2grep_callout, NULL);
4220#endif
4221
4222/* Put limits into the match data block. */
4223
4224if (heap_limit != PCRE2_UNSET) pcre2_set_heap_limit(match_context, heap_limit);
4225if (match_limit > 0) pcre2_set_match_limit(match_context, match_limit);
4226if (depth_limit > 0) pcre2_set_depth_limit(match_context, depth_limit);
4227
4228/* If a locale has not been provided as an option, see if the LC_CTYPE or
4229LC_ALL environment variable is set, and if so, use it. */
4230
4231if (locale == NULL)
4232 {
4233 locale = getenv("LC_ALL");
4234 locale_from = "LC_ALL";
4235 }
4236
4237if (locale == NULL)
4238 {
4239 locale = getenv("LC_CTYPE");
4240 locale_from = "LC_CTYPE";
4241 }
4242
4243/* If a locale is set, use it to generate the tables the PCRE needs. Passing
4244NULL to pcre2_maketables() means that malloc() is used to get the memory. */
4245
4246if (locale != NULL)
4247 {
4248 if (setlocale(LC_CTYPE, locale) == NULL)
4249 {
4250 fprintf(stderr, "pcre2grep: Failed to set locale %s (obtained from %s)\n",
4251 locale, locale_from);
4252 goto EXIT2;
4253 }
4254 character_tables = pcre2_maketables(NULL);
4255 pcre2_set_character_tables(compile_context, character_tables);
4256 }
4257
4258/* Sort out colouring */
4259
4260if (colour_option != NULL && strcmp(colour_option, "never") != 0)
4261 {
4262 if (strcmp(colour_option, "always") == 0)
4263#ifdef WIN32
4264 do_ansi = !is_stdout_tty(),
4265#endif
4266 do_colour = TRUE;
4267 else if (strcmp(colour_option, "auto") == 0) do_colour = is_stdout_tty();
4268 else
4269 {
4270 fprintf(stderr, "pcre2grep: Unknown colour setting \"%s\"\n",
4271 colour_option);
4272 goto EXIT2;
4273 }
4274 if (do_colour)
4275 {
4276 char *cs = getenv("PCRE2GREP_COLOUR");
4277 if (cs == NULL) cs = getenv("PCRE2GREP_COLOR");
4278 if (cs == NULL) cs = getenv("PCREGREP_COLOUR");
4279 if (cs == NULL) cs = getenv("PCREGREP_COLOR");
4280 if (cs == NULL) cs = parse_grep_colors(getenv("GREP_COLORS"));
4281 if (cs == NULL) cs = getenv("GREP_COLOR");
4282 if (cs != NULL)
4283 {
4284 if (strspn(cs, ";0123456789") == strlen(cs)) colour_string = cs;
4285 }
4286#ifdef WIN32
4287 init_colour_output();
4288#endif
4289 }
4290 }
4291
4292/* Sort out a newline setting. */
4293
4294if (newline_arg != NULL)
4295 {
4296 for (endlinetype = 1; endlinetype < (int)(sizeof(newlines)/sizeof(char *));
4297 endlinetype++)
4298 {
4299 if (strcmpic(newline_arg, newlines[endlinetype]) == 0) break;
4300 }
4301 if (endlinetype < (int)(sizeof(newlines)/sizeof(char *)))
4302 pcre2_set_newline(compile_context, endlinetype);
4303 else
4304 {
4305 fprintf(stderr, "pcre2grep: Invalid newline specifier \"%s\"\n",
4306 newline_arg);
4307 goto EXIT2;
4308 }
4309 }
4310
4311/* Find default newline convention */
4312
4313else
4314 {
4315 (void)pcre2_config(PCRE2_CONFIG_NEWLINE, &endlinetype);
4316 }
4317
4318/* Interpret the text values for -d and -D */
4319
4320if (dee_option != NULL)
4321 {
4322 if (strcmp(dee_option, "read") == 0) dee_action = dee_READ;
4323 else if (strcmp(dee_option, "recurse") == 0) dee_action = dee_RECURSE;
4324 else if (strcmp(dee_option, "skip") == 0) dee_action = dee_SKIP;
4325 else
4326 {
4327 fprintf(stderr, "pcre2grep: Invalid value \"%s\" for -d\n", dee_option);
4328 goto EXIT2;
4329 }
4330 }
4331
4332if (DEE_option != NULL)
4333 {
4334 if (strcmp(DEE_option, "read") == 0) DEE_action = DEE_READ;
4335 else if (strcmp(DEE_option, "skip") == 0) DEE_action = DEE_SKIP;
4336 else
4337 {
4338 fprintf(stderr, "pcre2grep: Invalid value \"%s\" for -D\n", DEE_option);
4339 goto EXIT2;
4340 }
4341 }
4342
4343/* Set the extra options */
4344
4345(void)pcre2_set_compile_extra_options(compile_context, extra_options);
4346
4347/* Check the values for Jeffrey Friedl's debugging options. */
4348
4349#ifdef JFRIEDL_DEBUG
4350if (S_arg > 9)
4351 {
4352 fprintf(stderr, "pcre2grep: bad value for -S option\n");
4353 return 2;
4354 }
4355if (jfriedl_XT != 0 || jfriedl_XR != 0)
4356 {
4357 if (jfriedl_XT == 0) jfriedl_XT = 1;
4358 if (jfriedl_XR == 0) jfriedl_XR = 1;
4359 }
4360#endif
4361
4362/* If use_jit is set, check whether JIT is available. If not, do not try
4363to use JIT. */
4364
4365if (use_jit)
4366 {
4367 uint32_t answer;
4368 (void)pcre2_config(PCRE2_CONFIG_JIT, &answer);
4369 if (!answer) use_jit = FALSE;
4370 }
4371
4372/* Get memory for the main buffer. */
4373
4374if (bufthird <= 0)
4375 {
4376 fprintf(stderr, "pcre2grep: --buffer-size must be greater than zero\n");
4377 goto EXIT2;
4378 }
4379
4380bufsize = 3*bufthird;
4381main_buffer = (char *)malloc(bufsize);
4382
4383if (main_buffer == NULL)
4384 {
4385 fprintf(stderr, "pcre2grep: malloc failed\n");
4386 goto EXIT2;
4387 }
4388
4389/* If no patterns were provided by -e, and there are no files provided by -f,
4390the first argument is the one and only pattern, and it must exist. */
4391
4392if (patterns == NULL && pattern_files == NULL)
4393 {
4394 if (i >= argc) return usage(2);
4395 patterns = patterns_last = add_pattern(argv[i], (PCRE2_SIZE)strlen(argv[i]),
4396 NULL);
4397 i++;
4398 if (patterns == NULL) goto EXIT2;
4399 }
4400
4401/* Compile the patterns that were provided on the command line, either by
4402multiple uses of -e or as a single unkeyed pattern. We cannot do this until
4403after all the command-line options are read so that we know which PCRE options
4404to use. When -F is used, compile_pattern() may add another block into the
4405chain, so we must not access the next pointer till after the compile. */
4406
4407for (j = 1, cp = patterns; cp != NULL; j++, cp = cp->next)
4408 {
4409 if (!compile_pattern(cp, pcre2_options, FALSE, "command-line",
4410 (j == 1 && patterns->next == NULL)? 0 : j))
4411 goto EXIT2;
4412 }
4413
4414/* Read and compile the regular expressions that are provided in files. */
4415
4416for (fn = pattern_files; fn != NULL; fn = fn->next)
4417 {
4418 if (!read_pattern_file(fn->name, &patterns, &patterns_last)) goto EXIT2;
4419 }
4420
4421/* Unless JIT has been explicitly disabled, arrange a stack for it to use. */
4422
4423#ifdef SUPPORT_PCRE2GREP_JIT
4424if (use_jit)
4425 {
4426 jit_stack = pcre2_jit_stack_create(32*1024, 1024*1024, NULL);
4427 if (jit_stack != NULL )
4428 pcre2_jit_stack_assign(match_context, NULL, jit_stack);
4429 }
4430#endif
4431
4432/* -F, -w, and -x do not apply to include or exclude patterns, so we must
4433adjust the options. */
4434
4435pcre2_options &= ~PCRE2_LITERAL;
4436(void)pcre2_set_compile_extra_options(compile_context, 0);
4437
4438/* If there are include or exclude patterns read from the command line, compile
4439them. */
4440
4441for (j = 0; j < 4; j++)
4442 {
4443 int k;
4444 for (k = 1, cp = *(incexlist[j]); cp != NULL; k++, cp = cp->next)
4445 {
4446 if (!compile_pattern(cp, pcre2_options, FALSE, incexname[j],
4447 (k == 1 && cp->next == NULL)? 0 : k))
4448 goto EXIT2;
4449 }
4450 }
4451
4452/* Read and compile include/exclude patterns from files. */
4453
4454for (fn = include_from; fn != NULL; fn = fn->next)
4455 {
4456 if (!read_pattern_file(fn->name, &include_patterns, &include_patterns_last))
4457 goto EXIT2;
4458 }
4459
4460for (fn = exclude_from; fn != NULL; fn = fn->next)
4461 {
4462 if (!read_pattern_file(fn->name, &exclude_patterns, &exclude_patterns_last))
4463 goto EXIT2;
4464 }
4465
4466/* If there are no files that contain lists of files to search, and there are
4467no file arguments, search stdin, and then exit. */
4468
4469if (file_lists == NULL && i >= argc)
4470 {
4471 rc = pcre2grep(stdin, FR_PLAIN, stdin_name,
4472 (filenames > FN_DEFAULT)? stdin_name : NULL);
4473 goto EXIT;
4474 }
4475
4476/* If any files that contains a list of files to search have been specified,
4477read them line by line and search the given files. */
4478
4479for (fn = file_lists; fn != NULL; fn = fn->next)
4480 {
4481 char buffer[FNBUFSIZ];
4482 FILE *fl;
4483 if (strcmp(fn->name, "-") == 0) fl = stdin; else
4484 {
4485 fl = fopen(fn->name, "rb");
4486 if (fl == NULL)
4487 {
4488 fprintf(stderr, "pcre2grep: Failed to open %s: %s\n", fn->name,
4489 strerror(errno));
4490 goto EXIT2;
4491 }
4492 }
4493 while (fgets(buffer, sizeof(buffer), fl) != NULL)
4494 {
4495 int frc;
4496 char *end = buffer + (int)strlen(buffer);
4497 while (end > buffer && isspace(end[-1])) end--;
4498 *end = 0;
4499 if (*buffer != 0)
4500 {
4501 frc = grep_or_recurse(buffer, dee_action == dee_RECURSE, FALSE);
4502 if (frc > 1) rc = frc;
4503 else if (frc == 0 && rc == 1) rc = 0;
4504 }
4505 }
4506 if (fl != stdin) fclose(fl);
4507 }
4508
4509/* After handling file-list, work through remaining arguments. Pass in the fact
4510that there is only one argument at top level - this suppresses the file name if
4511the argument is not a directory and filenames are not otherwise forced. */
4512
4513only_one_at_top = i == argc - 1 && file_lists == NULL;
4514
4515for (; i < argc; i++)
4516 {
4517 int frc = grep_or_recurse(argv[i], dee_action == dee_RECURSE,
4518 only_one_at_top);
4519 if (frc > 1) rc = frc;
4520 else if (frc == 0 && rc == 1) rc = 0;
4521 }
4522
4523#ifdef SUPPORT_PCRE2GREP_CALLOUT
4524/* If separating builtin echo callouts by implicit newline, add one more for
4525the final item. */
4526
4527if (om_separator != NULL && strcmp(om_separator, STDOUT_NL) == 0)
4528 fprintf(stdout, STDOUT_NL);
4529#endif
4530
4531/* Show the total number of matches if requested, but not if only one file's
4532count was printed. */
4533
4534if (show_total_count && counts_printed != 1 && filenames != FN_NOMATCH_ONLY)
4535 {
4536 if (counts_printed != 0 && filenames >= FN_DEFAULT)
4537 fprintf(stdout, "TOTAL:");
4538 fprintf(stdout, "%lu" STDOUT_NL, total_count);
4539 }
4540
4541EXIT:
4542#ifdef SUPPORT_PCRE2GREP_JIT
4543pcre2_jit_free_unused_memory(NULL);
4544if (jit_stack != NULL) pcre2_jit_stack_free(jit_stack);
4545#endif
4546
4547free(main_buffer);
4548if (character_tables != NULL) pcre2_maketables_free(NULL, character_tables);
4549
4550pcre2_compile_context_free(compile_context);
4551pcre2_match_context_free(match_context);
4552pcre2_match_data_free(match_data);
4553
4554free_pattern_chain(patterns);
4555free_pattern_chain(include_patterns);
4556free_pattern_chain(include_dir_patterns);
4557free_pattern_chain(exclude_patterns);
4558free_pattern_chain(exclude_dir_patterns);
4559
4560free_file_chain(exclude_from);
4561free_file_chain(include_from);
4562free_file_chain(pattern_files);
4563free_file_chain(file_lists);
4564
4565while (only_matching != NULL)
4566 {
4567 omstr *this = only_matching;
4568 only_matching = this->next;
4569 free(this);
4570 }
4571
4572pcre2grep_exit(rc);
4573
4574EXIT2:
4575rc = 2;
4576goto EXIT;
4577}
4578
4579/* End of pcre2grep */