blob: 2335d0ddb8862d9f133bac07ade67fc98d453b03 [file] [log] [blame]
Elliott Hughes5b808042021-10-01 10:56:10 -07001/*************************************************
2* pcre2grep program *
3*************************************************/
4
5/* This is a grep program that uses the 8-bit PCRE regular expression library
6via the PCRE2 updated API to do its pattern matching. On Unix-like, Windows,
7and native z/OS systems it can recurse into directories, and in z/OS it can
8handle PDS files.
9
10Note that for native z/OS, in addition to defining the NATIVE_ZOS macro, an
11additional header is required. That header is not included in the main PCRE2
12distribution because other apparatus is needed to compile pcre2grep for z/OS.
13The header can be found in the special z/OS distribution, which is available
14from www.zaconsultants.net or from www.cbttape.org.
15
16 Copyright (c) 1997-2020 University of Cambridge
17
18-----------------------------------------------------------------------------
19Redistribution and use in source and binary forms, with or without
20modification, are permitted provided that the following conditions are met:
21
22 * Redistributions of source code must retain the above copyright notice,
23 this list of conditions and the following disclaimer.
24
25 * Redistributions in binary form must reproduce the above copyright
26 notice, this list of conditions and the following disclaimer in the
27 documentation and/or other materials provided with the distribution.
28
29 * Neither the name of the University of Cambridge nor the names of its
30 contributors may be used to endorse or promote products derived from
31 this software without specific prior written permission.
32
33THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
34AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
35IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
36ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
37LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
38CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
39SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
40INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
41CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
42ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
43POSSIBILITY OF SUCH DAMAGE.
44-----------------------------------------------------------------------------
45*/
46
47#ifdef HAVE_CONFIG_H
48#include "config.h"
49#endif
50
51#include <ctype.h>
52#include <locale.h>
53#include <stdio.h>
54#include <string.h>
55#include <stdlib.h>
56#include <errno.h>
57
58#include <sys/types.h>
59#include <sys/stat.h>
60
61#if (defined _WIN32 || (defined HAVE_WINDOWS_H && HAVE_WINDOWS_H)) \
62 && !defined WIN32 && !defined(__CYGWIN__)
63#define WIN32
64#endif
65
66/* Some CMake's define it still */
67#if defined(__CYGWIN__) && defined(WIN32)
68#undef WIN32
69#endif
70
71#ifdef __VMS
72#include clidef
73#include descrip
74#include lib$routines
75#endif
76
77#ifdef WIN32
78#include <io.h> /* For _setmode() */
79#include <fcntl.h> /* For _O_BINARY */
80#endif
81
82#if defined(SUPPORT_PCRE2GREP_CALLOUT) && defined(SUPPORT_PCRE2GREP_CALLOUT_FORK)
83#ifdef WIN32
84#include <process.h>
85#else
86#include <sys/wait.h>
87#endif
88#endif
89
90#ifdef HAVE_UNISTD_H
91#include <unistd.h>
92#endif
93
94#ifdef SUPPORT_LIBZ
95#include <zlib.h>
96#endif
97
98#ifdef SUPPORT_LIBBZ2
99#include <bzlib.h>
100#endif
101
102#define PCRE2_CODE_UNIT_WIDTH 8
103#include "pcre2.h"
104
105/* Older versions of MSVC lack snprintf(). This define allows for
106warning/error-free compilation and testing with MSVC compilers back to at least
107MSVC 10/2010. Except for VC6 (which is missing some fundamentals and fails). */
108
109#if defined(_MSC_VER) && (_MSC_VER < 1900)
110#define snprintf _snprintf
111#endif
112
Elliott Hughes16619d62021-10-29 12:10:38 -0700113/* old VC and older compilers don't support %td or %zu, and even some that claim to
Elliott Hughes5b808042021-10-01 10:56:10 -0700114be C99 don't support it (hence DISABLE_PERCENT_ZT). */
115
Elliott Hughes16619d62021-10-29 12:10:38 -0700116#if defined(DISABLE_PERCENT_ZT) || (defined(_MSC_VER) && (_MSC_VER < 1800)) || \
117 (!defined(_MSC_VER) && (!defined(__STDC_VERSION__) || __STDC_VERSION__ < 199901L))
118#ifdef _WIN64
119#define SIZ_FORM "llu"
Elliott Hughes5b808042021-10-01 10:56:10 -0700120#else
Elliott Hughes16619d62021-10-29 12:10:38 -0700121#define SIZ_FORM "lu"
122#endif
123#else
Elliott Hughes5b808042021-10-01 10:56:10 -0700124#define SIZ_FORM "zu"
Elliott Hughes5b808042021-10-01 10:56:10 -0700125#endif
126
127#define FALSE 0
128#define TRUE 1
129
130typedef int BOOL;
131
132#define DEFAULT_CAPTURE_MAX 50
133
134#if BUFSIZ > 8192
135#define MAXPATLEN BUFSIZ
136#else
137#define MAXPATLEN 8192
138#endif
139
140#define FNBUFSIZ 2048
141#define ERRBUFSIZ 256
142
143/* Values for the "filenames" variable, which specifies options for file name
144output. The order is important; it is assumed that a file name is wanted for
145all values greater than FN_DEFAULT. */
146
147enum { FN_NONE, FN_DEFAULT, FN_MATCH_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
148
149/* File reading styles */
150
151enum { FR_PLAIN, FR_LIBZ, FR_LIBBZ2 };
152
153/* Actions for the -d and -D options */
154
155enum { dee_READ, dee_SKIP, dee_RECURSE };
156enum { DEE_READ, DEE_SKIP };
157
158/* Actions for special processing options (flag bits) */
159
160#define PO_WORD_MATCH 0x0001
161#define PO_LINE_MATCH 0x0002
162#define PO_FIXED_STRINGS 0x0004
163
164/* Binary file options */
165
166enum { BIN_BINARY, BIN_NOMATCH, BIN_TEXT };
167
168/* Return values from decode_dollar_escape() */
169
170enum { DDE_ERROR, DDE_CAPTURE, DDE_CHAR };
171
172/* In newer versions of gcc, with FORTIFY_SOURCE set (the default in some
173environments), a warning is issued if the value of fwrite() is ignored.
174Unfortunately, casting to (void) does not suppress the warning. To get round
175this, we use a macro that compiles a fudge. Oddly, this does not also seem to
176apply to fprintf(). */
177
178#define FWRITE_IGNORE(a,b,c,d) if (fwrite(a,b,c,d)) {}
179
180/* Under Windows, we have to set stdout to be binary, so that it does not
181convert \r\n at the ends of output lines to \r\r\n. However, that means that
182any messages written to stdout must have \r\n as their line terminator. This is
183handled by using STDOUT_NL as the newline string. We also use a normal double
184quote for the example, as single quotes aren't usually available. */
185
186#ifdef WIN32
187#define STDOUT_NL "\r\n"
188#define STDOUT_NL_LEN 2
189#define QUOT "\""
190#else
191#define STDOUT_NL "\n"
192#define STDOUT_NL_LEN 1
193#define QUOT "'"
194#endif
195
196/* This code is returned from decode_dollar_escape() when $n is encountered,
197and used to mean "output STDOUT_NL". It is, of course, not a valid Unicode code
198point. */
199
200#define STDOUT_NL_CODE 0x7fffffffu
201
202
203
204/*************************************************
205* Global variables *
206*************************************************/
207
208/* Jeffrey Friedl has some debugging requirements that are not part of the
209regular code. */
210
Elliott Hughes5b808042021-10-01 10:56:10 -0700211static const char *colour_string = "1;31";
212static const char *colour_option = NULL;
213static const char *dee_option = NULL;
214static const char *DEE_option = NULL;
215static const char *locale = NULL;
216static const char *newline_arg = NULL;
217static const char *om_separator = NULL;
218static const char *stdin_name = "(standard input)";
219static const char *output_text = NULL;
220
221static char *main_buffer = NULL;
222
223static int after_context = 0;
224static int before_context = 0;
225static int binary_files = BIN_BINARY;
226static int both_context = 0;
227static int bufthird = PCRE2GREP_BUFSIZE;
228static int max_bufthird = PCRE2GREP_MAX_BUFSIZE;
229static int bufsize = 3*PCRE2GREP_BUFSIZE;
230static int endlinetype;
231
232static int count_limit = -1; /* Not long, so that it works with OP_NUMBER */
233static unsigned long int counts_printed = 0;
234static unsigned long int total_count = 0;
235
236#ifdef WIN32
237static int dee_action = dee_SKIP;
238#else
239static int dee_action = dee_READ;
240#endif
241
242static int DEE_action = DEE_READ;
243static int error_count = 0;
244static int filenames = FN_DEFAULT;
245
246#ifdef SUPPORT_PCRE2GREP_JIT
247static BOOL use_jit = TRUE;
248#else
249static BOOL use_jit = FALSE;
250#endif
251
252static const uint8_t *character_tables = NULL;
253
254static uint32_t pcre2_options = 0;
255static uint32_t extra_options = 0;
256static PCRE2_SIZE heap_limit = PCRE2_UNSET;
257static uint32_t match_limit = 0;
258static uint32_t depth_limit = 0;
259
260static pcre2_compile_context *compile_context;
261static pcre2_match_context *match_context;
262static pcre2_match_data *match_data;
263static PCRE2_SIZE *offsets;
264static uint32_t offset_size;
265static uint32_t capture_max = DEFAULT_CAPTURE_MAX;
266
267static BOOL count_only = FALSE;
268static BOOL do_colour = FALSE;
269#ifdef WIN32
270static BOOL do_ansi = FALSE;
271#endif
272static BOOL file_offsets = FALSE;
273static BOOL hyphenpending = FALSE;
274static BOOL invert = FALSE;
275static BOOL line_buffered = FALSE;
276static BOOL line_offsets = FALSE;
277static BOOL multiline = FALSE;
278static BOOL number = FALSE;
279static BOOL omit_zero_count = FALSE;
280static BOOL resource_error = FALSE;
281static BOOL quiet = FALSE;
282static BOOL show_total_count = FALSE;
283static BOOL silent = FALSE;
284static BOOL utf = FALSE;
285
286static uint8_t utf8_buffer[8];
287
288
289/* Structure for list of --only-matching capturing numbers. */
290
291typedef struct omstr {
292 struct omstr *next;
293 int groupnum;
294} omstr;
295
296static omstr *only_matching = NULL;
297static omstr *only_matching_last = NULL;
298static int only_matching_count;
299
300/* Structure for holding the two variables that describe a number chain. */
301
302typedef struct omdatastr {
303 omstr **anchor;
304 omstr **lastptr;
305} omdatastr;
306
307static omdatastr only_matching_data = { &only_matching, &only_matching_last };
308
309/* Structure for list of file names (for -f and --{in,ex}clude-from) */
310
311typedef struct fnstr {
312 struct fnstr *next;
313 char *name;
314} fnstr;
315
316static fnstr *exclude_from = NULL;
317static fnstr *exclude_from_last = NULL;
318static fnstr *include_from = NULL;
319static fnstr *include_from_last = NULL;
320
321static fnstr *file_lists = NULL;
322static fnstr *file_lists_last = NULL;
323static fnstr *pattern_files = NULL;
324static fnstr *pattern_files_last = NULL;
325
326/* Structure for holding the two variables that describe a file name chain. */
327
328typedef struct fndatastr {
329 fnstr **anchor;
330 fnstr **lastptr;
331} fndatastr;
332
333static fndatastr exclude_from_data = { &exclude_from, &exclude_from_last };
334static fndatastr include_from_data = { &include_from, &include_from_last };
335static fndatastr file_lists_data = { &file_lists, &file_lists_last };
336static fndatastr pattern_files_data = { &pattern_files, &pattern_files_last };
337
338/* Structure for pattern and its compiled form; used for matching patterns and
339also for include/exclude patterns. */
340
341typedef struct patstr {
342 struct patstr *next;
343 char *string;
344 PCRE2_SIZE length;
345 pcre2_code *compiled;
346} patstr;
347
348static patstr *patterns = NULL;
349static patstr *patterns_last = NULL;
350static patstr *include_patterns = NULL;
351static patstr *include_patterns_last = NULL;
352static patstr *exclude_patterns = NULL;
353static patstr *exclude_patterns_last = NULL;
354static patstr *include_dir_patterns = NULL;
355static patstr *include_dir_patterns_last = NULL;
356static patstr *exclude_dir_patterns = NULL;
357static patstr *exclude_dir_patterns_last = NULL;
358
359/* Structure holding the two variables that describe a pattern chain. A pointer
360to such structures is used for each appropriate option. */
361
362typedef struct patdatastr {
363 patstr **anchor;
364 patstr **lastptr;
365} patdatastr;
366
367static patdatastr match_patdata = { &patterns, &patterns_last };
368static patdatastr include_patdata = { &include_patterns, &include_patterns_last };
369static patdatastr exclude_patdata = { &exclude_patterns, &exclude_patterns_last };
370static patdatastr include_dir_patdata = { &include_dir_patterns, &include_dir_patterns_last };
371static patdatastr exclude_dir_patdata = { &exclude_dir_patterns, &exclude_dir_patterns_last };
372
373static patstr **incexlist[4] = { &include_patterns, &exclude_patterns,
374 &include_dir_patterns, &exclude_dir_patterns };
375
376static const char *incexname[4] = { "--include", "--exclude",
377 "--include-dir", "--exclude-dir" };
378
379/* Structure for options and list of them */
380
381enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_U32NUMBER, OP_SIZE,
382 OP_OP_NUMBER, OP_OP_NUMBERS, OP_PATLIST, OP_FILELIST, OP_BINFILES };
383
384typedef struct option_item {
385 int type;
386 int one_char;
387 void *dataptr;
388 const char *long_name;
389 const char *help_text;
390} option_item;
391
392/* Options without a single-letter equivalent get a negative value. This can be
393used to identify them. */
394
395#define N_COLOUR (-1)
396#define N_EXCLUDE (-2)
397#define N_EXCLUDE_DIR (-3)
398#define N_HELP (-4)
399#define N_INCLUDE (-5)
400#define N_INCLUDE_DIR (-6)
401#define N_LABEL (-7)
402#define N_LOCALE (-8)
403#define N_NULL (-9)
404#define N_LOFFSETS (-10)
405#define N_FOFFSETS (-11)
406#define N_LBUFFER (-12)
407#define N_H_LIMIT (-13)
408#define N_M_LIMIT (-14)
409#define N_M_LIMIT_DEP (-15)
410#define N_BUFSIZE (-16)
411#define N_NOJIT (-17)
412#define N_FILE_LIST (-18)
413#define N_BINARY_FILES (-19)
414#define N_EXCLUDE_FROM (-20)
415#define N_INCLUDE_FROM (-21)
416#define N_OM_SEPARATOR (-22)
417#define N_MAX_BUFSIZE (-23)
418#define N_OM_CAPTURE (-24)
419#define N_ALLABSK (-25)
420
421static option_item optionlist[] = {
422 { OP_NODATA, N_NULL, NULL, "", "terminate options" },
423 { OP_NODATA, N_HELP, NULL, "help", "display this help and exit" },
424 { OP_NUMBER, 'A', &after_context, "after-context=number", "set number of following context lines" },
425 { OP_NODATA, 'a', NULL, "text", "treat binary files as text" },
426 { OP_NUMBER, 'B', &before_context, "before-context=number", "set number of prior context lines" },
427 { OP_BINFILES, N_BINARY_FILES, NULL, "binary-files=word", "set treatment of binary files" },
428 { OP_NUMBER, N_BUFSIZE,&bufthird, "buffer-size=number", "set processing buffer starting size" },
429 { OP_NUMBER, N_MAX_BUFSIZE,&max_bufthird, "max-buffer-size=number", "set processing buffer maximum size" },
430 { OP_OP_STRING, N_COLOUR, &colour_option, "color=option", "matched text color option" },
431 { OP_OP_STRING, N_COLOUR, &colour_option, "colour=option", "matched text colour option" },
432 { OP_NUMBER, 'C', &both_context, "context=number", "set number of context lines, before & after" },
433 { OP_NODATA, 'c', NULL, "count", "print only a count of matching lines per FILE" },
434 { OP_STRING, 'D', &DEE_option, "devices=action","how to handle devices, FIFOs, and sockets" },
435 { OP_STRING, 'd', &dee_option, "directories=action", "how to handle directories" },
436 { OP_PATLIST, 'e', &match_patdata, "regex(p)=pattern", "specify pattern (may be used more than once)" },
437 { OP_NODATA, 'F', NULL, "fixed-strings", "patterns are sets of newline-separated strings" },
438 { OP_FILELIST, 'f', &pattern_files_data, "file=path", "read patterns from file" },
439 { OP_FILELIST, N_FILE_LIST, &file_lists_data, "file-list=path","read files to search from file" },
440 { OP_NODATA, N_FOFFSETS, NULL, "file-offsets", "output file offsets, not text" },
441 { OP_NODATA, 'H', NULL, "with-filename", "force the prefixing filename on output" },
442 { OP_NODATA, 'h', NULL, "no-filename", "suppress the prefixing filename on output" },
443 { OP_NODATA, 'I', NULL, "", "treat binary files as not matching (ignore)" },
444 { OP_NODATA, 'i', NULL, "ignore-case", "ignore case distinctions" },
445 { OP_NODATA, 'l', NULL, "files-with-matches", "print only FILE names containing matches" },
446 { OP_NODATA, 'L', NULL, "files-without-match","print only FILE names not containing matches" },
447 { OP_STRING, N_LABEL, &stdin_name, "label=name", "set name for standard input" },
448 { OP_NODATA, N_LBUFFER, NULL, "line-buffered", "use line buffering" },
449 { OP_NODATA, N_LOFFSETS, NULL, "line-offsets", "output line numbers and offsets, not text" },
450 { OP_STRING, N_LOCALE, &locale, "locale=locale", "use the named locale" },
451 { OP_SIZE, N_H_LIMIT, &heap_limit, "heap-limit=number", "set PCRE2 heap limit option (kibibytes)" },
452 { OP_U32NUMBER, N_M_LIMIT, &match_limit, "match-limit=number", "set PCRE2 match limit option" },
453 { OP_U32NUMBER, N_M_LIMIT_DEP, &depth_limit, "depth-limit=number", "set PCRE2 depth limit option" },
454 { OP_U32NUMBER, N_M_LIMIT_DEP, &depth_limit, "recursion-limit=number", "obsolete synonym for depth-limit" },
455 { OP_NODATA, 'M', NULL, "multiline", "run in multiline mode" },
456 { OP_NUMBER, 'm', &count_limit, "max-count=number", "stop after <number> matched lines" },
457 { OP_STRING, 'N', &newline_arg, "newline=type", "set newline type (CR, LF, CRLF, ANYCRLF, ANY, or NUL)" },
458 { OP_NODATA, 'n', NULL, "line-number", "print line number with output lines" },
459#ifdef SUPPORT_PCRE2GREP_JIT
460 { OP_NODATA, N_NOJIT, NULL, "no-jit", "do not use just-in-time compiler optimization" },
461#else
462 { OP_NODATA, N_NOJIT, NULL, "no-jit", "ignored: this pcre2grep does not support JIT" },
463#endif
464 { OP_STRING, 'O', &output_text, "output=text", "show only this text (possibly expanded)" },
465 { OP_OP_NUMBERS, 'o', &only_matching_data, "only-matching=n", "show only the part of the line that matched" },
466 { OP_STRING, N_OM_SEPARATOR, &om_separator, "om-separator=text", "set separator for multiple -o output" },
467 { OP_U32NUMBER, N_OM_CAPTURE, &capture_max, "om-capture=n", "set capture count for --only-matching" },
468 { OP_NODATA, 'q', NULL, "quiet", "suppress output, just set return code" },
469 { OP_NODATA, 'r', NULL, "recursive", "recursively scan sub-directories" },
470 { OP_PATLIST, N_EXCLUDE,&exclude_patdata, "exclude=pattern","exclude matching files when recursing" },
471 { OP_PATLIST, N_INCLUDE,&include_patdata, "include=pattern","include matching files when recursing" },
472 { OP_PATLIST, N_EXCLUDE_DIR,&exclude_dir_patdata, "exclude-dir=pattern","exclude matching directories when recursing" },
473 { OP_PATLIST, N_INCLUDE_DIR,&include_dir_patdata, "include-dir=pattern","include matching directories when recursing" },
474 { OP_FILELIST, N_EXCLUDE_FROM,&exclude_from_data, "exclude-from=path", "read exclude list from file" },
475 { OP_FILELIST, N_INCLUDE_FROM,&include_from_data, "include-from=path", "read include list from file" },
Elliott Hughes5b808042021-10-01 10:56:10 -0700476 { OP_NODATA, 's', NULL, "no-messages", "suppress error messages" },
477 { OP_NODATA, 't', NULL, "total-count", "print total count of matching lines" },
478 { OP_NODATA, 'u', NULL, "utf", "use UTF mode" },
479 { OP_NODATA, 'U', NULL, "utf-allow-invalid", "use UTF mode, allow for invalid code units" },
480 { OP_NODATA, 'V', NULL, "version", "print version information and exit" },
481 { OP_NODATA, 'v', NULL, "invert-match", "select non-matching lines" },
482 { OP_NODATA, 'w', NULL, "word-regex(p)", "force patterns to match only as words" },
483 { OP_NODATA, 'x', NULL, "line-regex(p)", "force patterns to match only whole lines" },
484 { OP_NODATA, N_ALLABSK, NULL, "allow-lookaround-bsk", "allow \\K in lookarounds" },
485 { OP_NODATA, 0, NULL, NULL, NULL }
486};
487
488/* Table of names for newline types. Must be kept in step with the definitions
489of PCRE2_NEWLINE_xx in pcre2.h. */
490
491static const char *newlines[] = {
492 "DEFAULT", "CR", "LF", "CRLF", "ANY", "ANYCRLF", "NUL" };
493
494/* UTF-8 tables */
495
496const int utf8_table1[] =
497 { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff};
498const int utf8_table1_size = sizeof(utf8_table1) / sizeof(int);
499
500const int utf8_table2[] = { 0, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc};
501const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
502
503const char utf8_table4[] = {
504 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
505 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
506 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
507 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
508
509
510#if !defined(VPCOMPAT) && !defined(HAVE_MEMMOVE)
511/*************************************************
512* Emulated memmove() for systems without it *
513*************************************************/
514
515/* This function can make use of bcopy() if it is available. Otherwise do it by
516steam, as there are some non-Unix environments that lack both memmove() and
517bcopy(). */
518
519static void *
520emulated_memmove(void *d, const void *s, size_t n)
521{
522#ifdef HAVE_BCOPY
523bcopy(s, d, n);
524return d;
525#else
526size_t i;
527unsigned char *dest = (unsigned char *)d;
528const unsigned char *src = (const unsigned char *)s;
529if (dest > src)
530 {
531 dest += n;
532 src += n;
533 for (i = 0; i < n; ++i) *(--dest) = *(--src);
534 return (void *)dest;
535 }
536else
537 {
538 for (i = 0; i < n; ++i) *dest++ = *src++;
539 return (void *)(dest - n);
540 }
541#endif /* not HAVE_BCOPY */
542}
543#undef memmove
544#define memmove(d,s,n) emulated_memmove(d,s,n)
545#endif /* not VPCOMPAT && not HAVE_MEMMOVE */
546
547
548
549/*************************************************
550* Convert code point to UTF-8 *
551*************************************************/
552
553/* A static buffer is used. Returns the number of bytes. */
554
555static int
556ord2utf8(uint32_t value)
557{
558int i, j;
559uint8_t *utf8bytes = utf8_buffer;
560for (i = 0; i < utf8_table1_size; i++)
561 if (value <= (uint32_t)utf8_table1[i]) break;
562utf8bytes += i;
563for (j = i; j > 0; j--)
564 {
565 *utf8bytes-- = 0x80 | (value & 0x3f);
566 value >>= 6;
567 }
568*utf8bytes = utf8_table2[i] | value;
569return i + 1;
570}
571
572
573
574/*************************************************
575* Case-independent string compare *
576*************************************************/
577
578static int
579strcmpic(const char *str1, const char *str2)
580{
581unsigned int c1, c2;
582while (*str1 != '\0' || *str2 != '\0')
583 {
584 c1 = tolower(*str1++);
585 c2 = tolower(*str2++);
586 if (c1 != c2) return ((c1 > c2) << 1) - 1;
587 }
588return 0;
589}
590
591
592/*************************************************
593* Parse GREP_COLORS *
594*************************************************/
595
596/* Extract ms or mt from GREP_COLORS.
597
598Argument: the string, possibly NULL
599Returns: the value of ms or mt, or NULL if neither present
600*/
601
602static char *
603parse_grep_colors(const char *gc)
604{
605static char seq[16];
606char *col;
607uint32_t len;
608if (gc == NULL) return NULL;
609col = strstr(gc, "ms=");
610if (col == NULL) col = strstr(gc, "mt=");
611if (col == NULL) return NULL;
612len = 0;
613col += 3;
614while (*col != ':' && *col != 0 && len < sizeof(seq)-1)
615 seq[len++] = *col++;
616seq[len] = 0;
617return seq;
618}
619
620
621/*************************************************
622* Exit from the program *
623*************************************************/
624
625/* If there has been a resource error, give a suitable message.
626
627Argument: the return code
628Returns: does not return
629*/
630
631static void
632pcre2grep_exit(int rc)
633{
634/* VMS does exit codes differently: both exit(1) and exit(0) return with a
635status of 1, which is not helpful. To help with this problem, define a symbol
636(akin to an environment variable) called "PCRE2GREP_RC" and put the exit code
637therein. */
638
639#ifdef __VMS
640 char val_buf[4];
641 $DESCRIPTOR(sym_nam, "PCRE2GREP_RC");
642 $DESCRIPTOR(sym_val, val_buf);
643 sprintf(val_buf, "%d", rc);
644 sym_val.dsc$w_length = strlen(val_buf);
645 lib$set_symbol(&sym_nam, &sym_val);
646#endif
647
648if (resource_error)
649 {
650 fprintf(stderr, "pcre2grep: Error %d, %d, %d or %d means that a resource "
651 "limit was exceeded.\n", PCRE2_ERROR_JIT_STACKLIMIT, PCRE2_ERROR_MATCHLIMIT,
652 PCRE2_ERROR_DEPTHLIMIT, PCRE2_ERROR_HEAPLIMIT);
653 fprintf(stderr, "pcre2grep: Check your regex for nested unlimited loops.\n");
654 }
655exit(rc);
656}
657
658
659/*************************************************
660* Add item to chain of patterns *
661*************************************************/
662
663/* Used to add an item onto a chain, or just return an unconnected item if the
664"after" argument is NULL.
665
666Arguments:
667 s pattern string to add
668 patlen length of pattern
669 after if not NULL points to item to insert after
670
671Returns: new pattern block or NULL on error
672*/
673
674static patstr *
675add_pattern(char *s, PCRE2_SIZE patlen, patstr *after)
676{
677patstr *p = (patstr *)malloc(sizeof(patstr));
678if (p == NULL)
679 {
680 fprintf(stderr, "pcre2grep: malloc failed\n");
681 pcre2grep_exit(2);
682 }
683if (patlen > MAXPATLEN)
684 {
685 fprintf(stderr, "pcre2grep: pattern is too long (limit is %d bytes)\n",
686 MAXPATLEN);
687 free(p);
688 return NULL;
689 }
690p->next = NULL;
691p->string = s;
692p->length = patlen;
693p->compiled = NULL;
694
695if (after != NULL)
696 {
697 p->next = after->next;
698 after->next = p;
699 }
700return p;
701}
702
703
704/*************************************************
705* Free chain of patterns *
706*************************************************/
707
708/* Used for several chains of patterns.
709
710Argument: pointer to start of chain
711Returns: nothing
712*/
713
714static void
715free_pattern_chain(patstr *pc)
716{
717while (pc != NULL)
718 {
719 patstr *p = pc;
720 pc = p->next;
721 if (p->compiled != NULL) pcre2_code_free(p->compiled);
722 free(p);
723 }
724}
725
726
727/*************************************************
728* Free chain of file names *
729*************************************************/
730
731/*
732Argument: pointer to start of chain
733Returns: nothing
734*/
735
736static void
737free_file_chain(fnstr *fn)
738{
739while (fn != NULL)
740 {
741 fnstr *f = fn;
742 fn = f->next;
743 free(f);
744 }
745}
746
747
748/*************************************************
749* OS-specific functions *
750*************************************************/
751
752/* These definitions are needed in all Windows environments, even those where
753Unix-style directory scanning can be used (see below). */
754
755#ifdef WIN32
756
757#ifndef STRICT
758# define STRICT
759#endif
760#ifndef WIN32_LEAN_AND_MEAN
761# define WIN32_LEAN_AND_MEAN
762#endif
763
764#include <windows.h>
765
766#define iswild(name) (strpbrk(name, "*?") != NULL)
767
768/* Convert ANSI BGR format to RGB used by Windows */
769#define BGR_RGB(x) ((x & 1 ? 4 : 0) | (x & 2) | (x & 4 ? 1 : 0))
770
771static HANDLE hstdout;
772static CONSOLE_SCREEN_BUFFER_INFO csbi;
773static WORD match_colour;
774
775static WORD
776decode_ANSI_colour(const char *cs)
777{
778WORD result = csbi.wAttributes;
779while (*cs)
780 {
781 if (isdigit(*cs))
782 {
783 int code = atoi(cs);
784 if (code == 1) result |= 0x08;
785 else if (code == 4) result |= 0x8000;
786 else if (code == 5) result |= 0x80;
787 else if (code >= 30 && code <= 37) result = (result & 0xF8) | BGR_RGB(code - 30);
788 else if (code == 39) result = (result & 0xF0) | (csbi.wAttributes & 0x0F);
789 else if (code >= 40 && code <= 47) result = (result & 0x8F) | (BGR_RGB(code - 40) << 4);
790 else if (code == 49) result = (result & 0x0F) | (csbi.wAttributes & 0xF0);
791 /* aixterm high intensity colour codes */
792 else if (code >= 90 && code <= 97) result = (result & 0xF0) | BGR_RGB(code - 90) | 0x08;
793 else if (code >= 100 && code <= 107) result = (result & 0x0F) | (BGR_RGB(code - 100) << 4) | 0x80;
794
795 while (isdigit(*cs)) cs++;
796 }
797 if (*cs) cs++;
798 }
799return result;
800}
801
802
803static void
804init_colour_output()
805{
806if (do_colour)
807 {
808 hstdout = GetStdHandle(STD_OUTPUT_HANDLE);
809 /* This fails when redirected to con; try again if so. */
810 if (!GetConsoleScreenBufferInfo(hstdout, &csbi) && !do_ansi)
811 {
812 HANDLE hcon = CreateFile("CONOUT$", GENERIC_READ | GENERIC_WRITE,
813 FILE_SHARE_WRITE, NULL, OPEN_EXISTING, 0, NULL);
814 GetConsoleScreenBufferInfo(hcon, &csbi);
815 CloseHandle(hcon);
816 }
817 match_colour = decode_ANSI_colour(colour_string);
818 /* No valid colour found - turn off colouring */
819 if (!match_colour) do_colour = FALSE;
820 }
821}
822
823#endif /* WIN32 */
824
825
826/* The following sets of functions are defined so that they can be made system
827specific. At present there are versions for Unix-style environments, Windows,
828native z/OS, and "no support". */
829
830
831/************* Directory scanning Unix-style and z/OS ***********/
832
833#if (defined HAVE_SYS_STAT_H && defined HAVE_DIRENT_H && defined HAVE_SYS_TYPES_H) || defined NATIVE_ZOS
834#include <sys/types.h>
835#include <sys/stat.h>
836#include <dirent.h>
837
838#if defined NATIVE_ZOS
839/************* Directory and PDS/E scanning for z/OS ***********/
840/************* z/OS looks mostly like Unix with USS ************/
841/* However, z/OS needs the #include statements in this header */
842#include "pcrzosfs.h"
843/* That header is not included in the main PCRE distribution because
844 other apparatus is needed to compile pcre2grep for z/OS. The header
845 can be found in the special z/OS distribution, which is available
846 from www.zaconsultants.net or from www.cbttape.org. */
847#endif
848
849typedef DIR directory_type;
850#define FILESEP '/'
851
852static int
853isdirectory(char *filename)
854{
855struct stat statbuf;
856if (stat(filename, &statbuf) < 0)
857 return 0; /* In the expectation that opening as a file will fail */
858return S_ISDIR(statbuf.st_mode);
859}
860
861static directory_type *
862opendirectory(char *filename)
863{
864return opendir(filename);
865}
866
867static char *
868readdirectory(directory_type *dir)
869{
870for (;;)
871 {
872 struct dirent *dent = readdir(dir);
873 if (dent == NULL) return NULL;
874 if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
875 return dent->d_name;
876 }
877/* Control never reaches here */
878}
879
880static void
881closedirectory(directory_type *dir)
882{
883closedir(dir);
884}
885
886
887/************* Test for regular file, Unix-style **********/
888
889static int
890isregfile(char *filename)
891{
892struct stat statbuf;
893if (stat(filename, &statbuf) < 0)
894 return 1; /* In the expectation that opening as a file will fail */
895return S_ISREG(statbuf.st_mode);
896}
897
898
899#if defined NATIVE_ZOS
900/************* Test for a terminal in z/OS **********/
901/* isatty() does not work in a TSO environment, so always give FALSE.*/
902
903static BOOL
904is_stdout_tty(void)
905{
906return FALSE;
907}
908
909static BOOL
910is_file_tty(FILE *f)
911{
912return FALSE;
913}
914
915
916/************* Test for a terminal, Unix-style **********/
917
918#else
919static BOOL
920is_stdout_tty(void)
921{
922return isatty(fileno(stdout));
923}
924
925static BOOL
926is_file_tty(FILE *f)
927{
928return isatty(fileno(f));
929}
930#endif
931
932
933/************* Print optionally coloured match Unix-style and z/OS **********/
934
935static void
936print_match(const void *buf, int length)
937{
938if (length == 0) return;
939if (do_colour) fprintf(stdout, "%c[%sm", 0x1b, colour_string);
940FWRITE_IGNORE(buf, 1, length, stdout);
941if (do_colour) fprintf(stdout, "%c[0m", 0x1b);
942}
943
944/* End of Unix-style or native z/OS environment functions. */
945
946
947/************* Directory scanning in Windows ***********/
948
949/* I (Philip Hazel) have no means of testing this code. It was contributed by
950Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
951when it did not exist. David Byron added a patch that moved the #include of
952<windows.h> to before the INVALID_FILE_ATTRIBUTES definition rather than after.
953*/
954
955#elif defined WIN32
956
957#ifndef INVALID_FILE_ATTRIBUTES
958#define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF
959#endif
960
961typedef struct directory_type
962{
963HANDLE handle;
964BOOL first;
965WIN32_FIND_DATA data;
966} directory_type;
967
968#define FILESEP '/'
969
970int
971isdirectory(char *filename)
972{
973DWORD attr = GetFileAttributes(filename);
974if (attr == INVALID_FILE_ATTRIBUTES)
975 return 0;
976return (attr & FILE_ATTRIBUTE_DIRECTORY) != 0;
977}
978
979directory_type *
980opendirectory(char *filename)
981{
982size_t len;
983char *pattern;
984directory_type *dir;
985DWORD err;
986len = strlen(filename);
987pattern = (char *)malloc(len + 3);
988dir = (directory_type *)malloc(sizeof(*dir));
989if ((pattern == NULL) || (dir == NULL))
990 {
991 fprintf(stderr, "pcre2grep: malloc failed\n");
992 pcre2grep_exit(2);
993 }
994memcpy(pattern, filename, len);
995if (iswild(filename))
996 pattern[len] = 0;
997else
998 memcpy(&(pattern[len]), "\\*", 3);
999dir->handle = FindFirstFile(pattern, &(dir->data));
1000if (dir->handle != INVALID_HANDLE_VALUE)
1001 {
1002 free(pattern);
1003 dir->first = TRUE;
1004 return dir;
1005 }
1006err = GetLastError();
1007free(pattern);
1008free(dir);
1009errno = (err == ERROR_ACCESS_DENIED) ? EACCES : ENOENT;
1010return NULL;
1011}
1012
1013char *
1014readdirectory(directory_type *dir)
1015{
1016for (;;)
1017 {
1018 if (!dir->first)
1019 {
1020 if (!FindNextFile(dir->handle, &(dir->data)))
1021 return NULL;
1022 }
1023 else
1024 {
1025 dir->first = FALSE;
1026 }
1027 if (strcmp(dir->data.cFileName, ".") != 0 && strcmp(dir->data.cFileName, "..") != 0)
1028 return dir->data.cFileName;
1029 }
1030#ifndef _MSC_VER
1031return NULL; /* Keep compiler happy; never executed */
1032#endif
1033}
1034
1035void
1036closedirectory(directory_type *dir)
1037{
1038FindClose(dir->handle);
1039free(dir);
1040}
1041
1042
1043/************* Test for regular file in Windows **********/
1044
1045/* I don't know how to do this, or if it can be done; assume all paths are
1046regular if they are not directories. */
1047
1048int isregfile(char *filename)
1049{
1050return !isdirectory(filename);
1051}
1052
1053
1054/************* Test for a terminal in Windows **********/
1055
1056static BOOL
1057is_stdout_tty(void)
1058{
1059return _isatty(_fileno(stdout));
1060}
1061
1062static BOOL
1063is_file_tty(FILE *f)
1064{
1065return _isatty(_fileno(f));
1066}
1067
1068
1069/************* Print optionally coloured match in Windows **********/
1070
1071static void
1072print_match(const void *buf, int length)
1073{
1074if (length == 0) return;
1075if (do_colour)
1076 {
1077 if (do_ansi) fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1078 else SetConsoleTextAttribute(hstdout, match_colour);
1079 }
1080FWRITE_IGNORE(buf, 1, length, stdout);
1081if (do_colour)
1082 {
1083 if (do_ansi) fprintf(stdout, "%c[0m", 0x1b);
1084 else SetConsoleTextAttribute(hstdout, csbi.wAttributes);
1085 }
1086}
1087
1088/* End of Windows functions */
1089
1090
1091/************* Directory scanning when we can't do it ***********/
1092
1093/* The type is void, and apart from isdirectory(), the functions do nothing. */
1094
1095#else
1096
1097#define FILESEP 0
1098typedef void directory_type;
1099
1100int isdirectory(char *filename) { return 0; }
1101directory_type * opendirectory(char *filename) { return (directory_type*)0;}
1102char *readdirectory(directory_type *dir) { return (char*)0;}
1103void closedirectory(directory_type *dir) {}
1104
1105
1106/************* Test for regular file when we can't do it **********/
1107
1108/* Assume all files are regular. */
1109
1110int isregfile(char *filename) { return 1; }
1111
1112
1113/************* Test for a terminal when we can't do it **********/
1114
1115static BOOL
1116is_stdout_tty(void)
1117{
1118return FALSE;
1119}
1120
1121static BOOL
1122is_file_tty(FILE *f)
1123{
1124return FALSE;
1125}
1126
1127
1128/************* Print optionally coloured match when we can't do it **********/
1129
1130static void
1131print_match(const void *buf, int length)
1132{
1133if (length == 0) return;
1134FWRITE_IGNORE(buf, 1, length, stdout);
1135}
1136
1137#endif /* End of system-specific functions */
1138
1139
1140
1141#ifndef HAVE_STRERROR
1142/*************************************************
1143* Provide strerror() for non-ANSI libraries *
1144*************************************************/
1145
1146/* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
1147in their libraries, but can provide the same facility by this simple
1148alternative function. */
1149
1150extern int sys_nerr;
1151extern char *sys_errlist[];
1152
1153char *
1154strerror(int n)
1155{
1156if (n < 0 || n >= sys_nerr) return "unknown error number";
1157return sys_errlist[n];
1158}
1159#endif /* HAVE_STRERROR */
1160
1161
1162
1163/*************************************************
1164* Usage function *
1165*************************************************/
1166
1167static int
1168usage(int rc)
1169{
1170option_item *op;
1171fprintf(stderr, "Usage: pcre2grep [-");
1172for (op = optionlist; op->one_char != 0; op++)
1173 {
1174 if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);
1175 }
1176fprintf(stderr, "] [long options] [pattern] [files]\n");
1177fprintf(stderr, "Type \"pcre2grep --help\" for more information and the long "
1178 "options.\n");
1179return rc;
1180}
1181
1182
1183
1184/*************************************************
1185* Help function *
1186*************************************************/
1187
1188static void
1189help(void)
1190{
1191option_item *op;
1192
1193printf("Usage: pcre2grep [OPTION]... [PATTERN] [FILE1 FILE2 ...]" STDOUT_NL);
1194printf("Search for PATTERN in each FILE or standard input." STDOUT_NL);
1195printf("PATTERN must be present if neither -e nor -f is used." STDOUT_NL);
1196
1197#ifdef SUPPORT_PCRE2GREP_CALLOUT
1198#ifdef SUPPORT_PCRE2GREP_CALLOUT_FORK
1199printf("All callout scripts in patterns are supported." STDOUT_NL);
1200#else
1201printf("Non-fork callout scripts in patterns are supported." STDOUT_NL);
1202#endif
1203#else
1204printf("Callout scripts are not supported in this pcre2grep." STDOUT_NL);
1205#endif
1206
1207printf("\"-\" can be used as a file name to mean STDIN." STDOUT_NL);
1208
1209#ifdef SUPPORT_LIBZ
1210printf("Files whose names end in .gz are read using zlib." STDOUT_NL);
1211#endif
1212
1213#ifdef SUPPORT_LIBBZ2
1214printf("Files whose names end in .bz2 are read using bzlib2." STDOUT_NL);
1215#endif
1216
1217#if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
1218printf("Other files and the standard input are read as plain files." STDOUT_NL STDOUT_NL);
1219#else
1220printf("All files are read as plain files, without any interpretation." STDOUT_NL STDOUT_NL);
1221#endif
1222
1223printf("Example: pcre2grep -i " QUOT "hello.*world" QUOT " menu.h main.c" STDOUT_NL STDOUT_NL);
1224printf("Options:" STDOUT_NL);
1225
1226for (op = optionlist; op->one_char != 0; op++)
1227 {
1228 int n;
1229 char s[4];
1230
1231 if (op->one_char > 0 && (op->long_name)[0] == 0)
1232 n = 31 - printf(" -%c", op->one_char);
1233 else
1234 {
1235 if (op->one_char > 0) sprintf(s, "-%c,", op->one_char);
1236 else strcpy(s, " ");
1237 n = 31 - printf(" %s --%s", s, op->long_name);
1238 }
1239
1240 if (n < 1) n = 1;
1241 printf("%.*s%s" STDOUT_NL, n, " ", op->help_text);
1242 }
1243
1244printf(STDOUT_NL "Numbers may be followed by K or M, e.g. --max-buffer-size=100K." STDOUT_NL);
1245printf("The default value for --buffer-size is %d." STDOUT_NL, PCRE2GREP_BUFSIZE);
1246printf("The default value for --max-buffer-size is %d." STDOUT_NL, PCRE2GREP_MAX_BUFSIZE);
1247printf("When reading patterns or file names from a file, trailing white" STDOUT_NL);
1248printf("space is removed and blank lines are ignored." STDOUT_NL);
1249printf("The maximum size of any pattern is %d bytes." STDOUT_NL, MAXPATLEN);
1250
1251printf(STDOUT_NL "With no FILEs, read standard input. If fewer than two FILEs given, assume -h." STDOUT_NL);
1252printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble." STDOUT_NL);
1253}
1254
1255
1256
1257/*************************************************
1258* Test exclude/includes *
1259*************************************************/
1260
1261/* If any exclude pattern matches, the path is excluded. Otherwise, unless
1262there are no includes, the path must match an include pattern.
1263
1264Arguments:
1265 path the path to be matched
1266 ip the chain of include patterns
1267 ep the chain of exclude patterns
1268
1269Returns: TRUE if the path is not excluded
1270*/
1271
1272static BOOL
1273test_incexc(char *path, patstr *ip, patstr *ep)
1274{
1275int plen = strlen((const char *)path);
1276
1277for (; ep != NULL; ep = ep->next)
1278 {
1279 if (pcre2_match(ep->compiled, (PCRE2_SPTR)path, plen, 0, 0, match_data, NULL) >= 0)
1280 return FALSE;
1281 }
1282
1283if (ip == NULL) return TRUE;
1284
1285for (; ip != NULL; ip = ip->next)
1286 {
1287 if (pcre2_match(ip->compiled, (PCRE2_SPTR)path, plen, 0, 0, match_data, NULL) >= 0)
1288 return TRUE;
1289 }
1290
1291return FALSE;
1292}
1293
1294
1295
1296/*************************************************
1297* Decode integer argument value *
1298*************************************************/
1299
1300/* Integer arguments can be followed by K or M. Avoid the use of strtoul()
1301because SunOS4 doesn't have it. This is used only for unpicking arguments, so
1302just keep it simple.
1303
1304Arguments:
1305 option_data the option data string
1306 op the option item (for error messages)
1307 longop TRUE if option given in long form
1308
1309Returns: a long integer
1310*/
1311
1312static long int
1313decode_number(char *option_data, option_item *op, BOOL longop)
1314{
1315unsigned long int n = 0;
1316char *endptr = option_data;
1317while (*endptr != 0 && isspace((unsigned char)(*endptr))) endptr++;
1318while (isdigit((unsigned char)(*endptr)))
1319 n = n * 10 + (int)(*endptr++ - '0');
1320if (toupper(*endptr) == 'K')
1321 {
1322 n *= 1024;
1323 endptr++;
1324 }
1325else if (toupper(*endptr) == 'M')
1326 {
1327 n *= 1024*1024;
1328 endptr++;
1329 }
1330
1331if (*endptr != 0) /* Error */
1332 {
1333 if (longop)
1334 {
1335 char *equals = strchr(op->long_name, '=');
1336 int nlen = (equals == NULL)? (int)strlen(op->long_name) :
1337 (int)(equals - op->long_name);
1338 fprintf(stderr, "pcre2grep: Malformed number \"%s\" after --%.*s\n",
1339 option_data, nlen, op->long_name);
1340 }
1341 else
1342 fprintf(stderr, "pcre2grep: Malformed number \"%s\" after -%c\n",
1343 option_data, op->one_char);
1344 pcre2grep_exit(usage(2));
1345 }
1346
1347return n;
1348}
1349
1350
1351
1352/*************************************************
1353* Add item to a chain of numbers *
1354*************************************************/
1355
1356/* Used to add an item onto a chain, or just return an unconnected item if the
1357"after" argument is NULL.
1358
1359Arguments:
1360 n the number to add
1361 after if not NULL points to item to insert after
1362
1363Returns: new number block
1364*/
1365
1366static omstr *
1367add_number(int n, omstr *after)
1368{
1369omstr *om = (omstr *)malloc(sizeof(omstr));
1370
1371if (om == NULL)
1372 {
1373 fprintf(stderr, "pcre2grep: malloc failed\n");
1374 pcre2grep_exit(2);
1375 }
1376om->next = NULL;
1377om->groupnum = n;
1378
1379if (after != NULL)
1380 {
1381 om->next = after->next;
1382 after->next = om;
1383 }
1384return om;
1385}
1386
1387
1388
1389/*************************************************
1390* Read one line of input *
1391*************************************************/
1392
1393/* Normally, input that is to be scanned is read using fread() (or gzread, or
1394BZ2_read) into a large buffer, so many lines may be read at once. However,
1395doing this for tty input means that no output appears until a lot of input has
1396been typed. Instead, tty input is handled line by line. We cannot use fgets()
1397for this, because it does not stop at a binary zero, and therefore there is no
1398way of telling how many characters it has read, because there may be binary
1399zeros embedded in the data. This function is also used for reading patterns
1400from files (the -f option).
1401
1402Arguments:
1403 buffer the buffer to read into
1404 length the maximum number of characters to read
1405 f the file
1406
1407Returns: the number of characters read, zero at end of file
1408*/
1409
1410static PCRE2_SIZE
1411read_one_line(char *buffer, int length, FILE *f)
1412{
1413int c;
1414int yield = 0;
1415while ((c = fgetc(f)) != EOF)
1416 {
1417 buffer[yield++] = c;
1418 if (c == '\n' || yield >= length) break;
1419 }
1420return yield;
1421}
1422
1423
1424
1425/*************************************************
1426* Find end of line *
1427*************************************************/
1428
1429/* The length of the endline sequence that is found is set via lenptr. This may
1430be zero at the very end of the file if there is no line-ending sequence there.
1431
1432Arguments:
1433 p current position in line
1434 endptr end of available data
1435 lenptr where to put the length of the eol sequence
1436
1437Returns: pointer after the last byte of the line,
1438 including the newline byte(s)
1439*/
1440
1441static char *
1442end_of_line(char *p, char *endptr, int *lenptr)
1443{
1444switch(endlinetype)
1445 {
1446 default: /* Just in case */
1447 case PCRE2_NEWLINE_LF:
1448 while (p < endptr && *p != '\n') p++;
1449 if (p < endptr)
1450 {
1451 *lenptr = 1;
1452 return p + 1;
1453 }
1454 *lenptr = 0;
1455 return endptr;
1456
1457 case PCRE2_NEWLINE_CR:
1458 while (p < endptr && *p != '\r') p++;
1459 if (p < endptr)
1460 {
1461 *lenptr = 1;
1462 return p + 1;
1463 }
1464 *lenptr = 0;
1465 return endptr;
1466
1467 case PCRE2_NEWLINE_NUL:
1468 while (p < endptr && *p != '\0') p++;
1469 if (p < endptr)
1470 {
1471 *lenptr = 1;
1472 return p + 1;
1473 }
1474 *lenptr = 0;
1475 return endptr;
1476
1477 case PCRE2_NEWLINE_CRLF:
1478 for (;;)
1479 {
1480 while (p < endptr && *p != '\r') p++;
1481 if (++p >= endptr)
1482 {
1483 *lenptr = 0;
1484 return endptr;
1485 }
1486 if (*p == '\n')
1487 {
1488 *lenptr = 2;
1489 return p + 1;
1490 }
1491 }
1492 break;
1493
1494 case PCRE2_NEWLINE_ANYCRLF:
1495 while (p < endptr)
1496 {
1497 int extra = 0;
1498 int c = *((unsigned char *)p);
1499
1500 if (utf && c >= 0xc0)
1501 {
1502 int gcii, gcss;
1503 extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
1504 gcss = 6*extra;
1505 c = (c & utf8_table3[extra]) << gcss;
1506 for (gcii = 1; gcii <= extra; gcii++)
1507 {
1508 gcss -= 6;
1509 c |= (p[gcii] & 0x3f) << gcss;
1510 }
1511 }
1512
1513 p += 1 + extra;
1514
1515 switch (c)
1516 {
1517 case '\n':
1518 *lenptr = 1;
1519 return p;
1520
1521 case '\r':
1522 if (p < endptr && *p == '\n')
1523 {
1524 *lenptr = 2;
1525 p++;
1526 }
1527 else *lenptr = 1;
1528 return p;
1529
1530 default:
1531 break;
1532 }
1533 } /* End of loop for ANYCRLF case */
1534
1535 *lenptr = 0; /* Must have hit the end */
1536 return endptr;
1537
1538 case PCRE2_NEWLINE_ANY:
1539 while (p < endptr)
1540 {
1541 int extra = 0;
1542 int c = *((unsigned char *)p);
1543
1544 if (utf && c >= 0xc0)
1545 {
1546 int gcii, gcss;
1547 extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
1548 gcss = 6*extra;
1549 c = (c & utf8_table3[extra]) << gcss;
1550 for (gcii = 1; gcii <= extra; gcii++)
1551 {
1552 gcss -= 6;
1553 c |= (p[gcii] & 0x3f) << gcss;
1554 }
1555 }
1556
1557 p += 1 + extra;
1558
1559 switch (c)
1560 {
1561 case '\n': /* LF */
1562 case '\v': /* VT */
1563 case '\f': /* FF */
1564 *lenptr = 1;
1565 return p;
1566
1567 case '\r': /* CR */
1568 if (p < endptr && *p == '\n')
1569 {
1570 *lenptr = 2;
1571 p++;
1572 }
1573 else *lenptr = 1;
1574 return p;
1575
1576#ifndef EBCDIC
1577 case 0x85: /* Unicode NEL */
1578 *lenptr = utf? 2 : 1;
1579 return p;
1580
1581 case 0x2028: /* Unicode LS */
1582 case 0x2029: /* Unicode PS */
1583 *lenptr = 3;
1584 return p;
1585#endif /* Not EBCDIC */
1586
1587 default:
1588 break;
1589 }
1590 } /* End of loop for ANY case */
1591
1592 *lenptr = 0; /* Must have hit the end */
1593 return endptr;
1594 } /* End of overall switch */
1595}
1596
1597
1598
1599/*************************************************
1600* Find start of previous line *
1601*************************************************/
1602
1603/* This is called when looking back for before lines to print.
1604
1605Arguments:
1606 p start of the subsequent line
1607 startptr start of available data
1608
1609Returns: pointer to the start of the previous line
1610*/
1611
1612static char *
1613previous_line(char *p, char *startptr)
1614{
1615switch(endlinetype)
1616 {
1617 default: /* Just in case */
1618 case PCRE2_NEWLINE_LF:
1619 p--;
1620 while (p > startptr && p[-1] != '\n') p--;
1621 return p;
1622
1623 case PCRE2_NEWLINE_CR:
1624 p--;
1625 while (p > startptr && p[-1] != '\n') p--;
1626 return p;
1627
1628 case PCRE2_NEWLINE_NUL:
1629 p--;
1630 while (p > startptr && p[-1] != '\0') p--;
1631 return p;
1632
1633 case PCRE2_NEWLINE_CRLF:
1634 for (;;)
1635 {
1636 p -= 2;
1637 while (p > startptr && p[-1] != '\n') p--;
1638 if (p <= startptr + 1 || p[-2] == '\r') return p;
1639 }
1640 /* Control can never get here */
1641
1642 case PCRE2_NEWLINE_ANY:
1643 case PCRE2_NEWLINE_ANYCRLF:
1644 if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;
1645 if (utf) while ((*p & 0xc0) == 0x80) p--;
1646
1647 while (p > startptr)
1648 {
1649 unsigned int c;
1650 char *pp = p - 1;
1651
1652 if (utf)
1653 {
1654 int extra = 0;
1655 while ((*pp & 0xc0) == 0x80) pp--;
1656 c = *((unsigned char *)pp);
1657 if (c >= 0xc0)
1658 {
1659 int gcii, gcss;
1660 extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
1661 gcss = 6*extra;
1662 c = (c & utf8_table3[extra]) << gcss;
1663 for (gcii = 1; gcii <= extra; gcii++)
1664 {
1665 gcss -= 6;
1666 c |= (pp[gcii] & 0x3f) << gcss;
1667 }
1668 }
1669 }
1670 else c = *((unsigned char *)pp);
1671
1672 if (endlinetype == PCRE2_NEWLINE_ANYCRLF) switch (c)
1673 {
1674 case '\n': /* LF */
1675 case '\r': /* CR */
1676 return p;
1677
1678 default:
1679 break;
1680 }
1681
1682 else switch (c)
1683 {
1684 case '\n': /* LF */
1685 case '\v': /* VT */
1686 case '\f': /* FF */
1687 case '\r': /* CR */
1688#ifndef EBCDIC
1689 case 0x85: /* Unicode NEL */
1690 case 0x2028: /* Unicode LS */
1691 case 0x2029: /* Unicode PS */
1692#endif /* Not EBCDIC */
1693 return p;
1694
1695 default:
1696 break;
1697 }
1698
1699 p = pp; /* Back one character */
1700 } /* End of loop for ANY case */
1701
1702 return startptr; /* Hit start of data */
1703 } /* End of overall switch */
1704}
1705
1706
1707
1708/*************************************************
1709* Output newline at end *
1710*************************************************/
1711
1712/* This function is called if the final line of a file has been written to
1713stdout, but it does not have a terminating newline.
1714
1715Arguments: none
1716Returns: nothing
1717*/
1718
1719static void
1720write_final_newline(void)
1721{
1722switch(endlinetype)
1723 {
1724 default: /* Just in case */
1725 case PCRE2_NEWLINE_LF:
1726 case PCRE2_NEWLINE_ANY:
1727 case PCRE2_NEWLINE_ANYCRLF:
1728 fprintf(stdout, "\n");
1729 break;
1730
1731 case PCRE2_NEWLINE_CR:
1732 fprintf(stdout, "\r");
1733 break;
1734
1735 case PCRE2_NEWLINE_CRLF:
1736 fprintf(stdout, "\r\n");
1737 break;
1738
1739 case PCRE2_NEWLINE_NUL:
1740 fprintf(stdout, "%c", 0);
1741 break;
1742 }
1743}
1744
1745
1746/*************************************************
1747* Print the previous "after" lines *
1748*************************************************/
1749
1750/* This is called if we are about to lose said lines because of buffer filling,
1751and at the end of the file. The data in the line is written using fwrite() so
1752that a binary zero does not terminate it.
1753
1754Arguments:
1755 lastmatchnumber the number of the last matching line, plus one
1756 lastmatchrestart where we restarted after the last match
1757 endptr end of available data
1758 printname filename for printing
1759
1760Returns: nothing
1761*/
1762
1763static void
1764do_after_lines(unsigned long int lastmatchnumber, char *lastmatchrestart,
1765 char *endptr, const char *printname)
1766{
1767if (after_context > 0 && lastmatchnumber > 0)
1768 {
1769 int count = 0;
1770 int ellength = 0;
1771 while (lastmatchrestart < endptr && count < after_context)
1772 {
1773 char *pp = end_of_line(lastmatchrestart, endptr, &ellength);
1774 if (ellength == 0 && pp == main_buffer + bufsize) break;
1775 if (printname != NULL) fprintf(stdout, "%s-", printname);
1776 if (number) fprintf(stdout, "%lu-", lastmatchnumber++);
1777 FWRITE_IGNORE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
1778 lastmatchrestart = pp;
1779 count++;
1780 }
1781
1782 /* If we have printed any lines, arrange for a hyphen separator if anything
1783 else follows. Also, if the last line is the final line in the file and it had
1784 no newline, add one. */
1785
1786 if (count > 0)
1787 {
1788 hyphenpending = TRUE;
1789 if (ellength == 0 && lastmatchrestart >= endptr)
1790 write_final_newline();
1791 }
1792 }
1793}
1794
1795
1796
1797/*************************************************
1798* Apply patterns to subject till one matches *
1799*************************************************/
1800
1801/* This function is called to run through all patterns, looking for a match. It
1802is used multiple times for the same subject when colouring is enabled, in order
1803to find all possible matches.
1804
1805Arguments:
1806 matchptr the start of the subject
1807 length the length of the subject to match
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07001808 options options for pcre2_match
Elliott Hughes5b808042021-10-01 10:56:10 -07001809 startoffset where to start matching
1810 mrc address of where to put the result of pcre2_match()
1811
1812Returns: TRUE if there was a match
1813 FALSE if there was no match
1814 invert if there was a non-fatal error
1815*/
1816
1817static BOOL
1818match_patterns(char *matchptr, PCRE2_SIZE length, unsigned int options,
1819 PCRE2_SIZE startoffset, int *mrc)
1820{
1821int i;
1822PCRE2_SIZE slen = length;
1823patstr *p = patterns;
1824const char *msg = "this text:\n\n";
1825
1826if (slen > 200)
1827 {
1828 slen = 200;
1829 msg = "text that starts:\n\n";
1830 }
1831
1832for (i = 1; p != NULL; p = p->next, i++)
1833 {
1834 *mrc = pcre2_match(p->compiled, (PCRE2_SPTR)matchptr, (int)length,
1835 startoffset, options, match_data, match_context);
1836 if (*mrc >= 0) return TRUE;
1837 if (*mrc == PCRE2_ERROR_NOMATCH) continue;
1838 fprintf(stderr, "pcre2grep: pcre2_match() gave error %d while matching ", *mrc);
1839 if (patterns->next != NULL) fprintf(stderr, "pattern number %d to ", i);
1840 fprintf(stderr, "%s", msg);
1841 FWRITE_IGNORE(matchptr, 1, slen, stderr); /* In case binary zero included */
1842 fprintf(stderr, "\n\n");
1843 if (*mrc <= PCRE2_ERROR_UTF8_ERR1 &&
1844 *mrc >= PCRE2_ERROR_UTF8_ERR21)
1845 {
1846 unsigned char mbuffer[256];
1847 PCRE2_SIZE startchar = pcre2_get_startchar(match_data);
1848 (void)pcre2_get_error_message(*mrc, mbuffer, sizeof(mbuffer));
Elliott Hughes16619d62021-10-29 12:10:38 -07001849 fprintf(stderr, "%s at offset %" SIZ_FORM "\n\n", mbuffer, startchar);
Elliott Hughes5b808042021-10-01 10:56:10 -07001850 }
1851 if (*mrc == PCRE2_ERROR_MATCHLIMIT || *mrc == PCRE2_ERROR_DEPTHLIMIT ||
1852 *mrc == PCRE2_ERROR_HEAPLIMIT || *mrc == PCRE2_ERROR_JIT_STACKLIMIT)
1853 resource_error = TRUE;
1854 if (error_count++ > 20)
1855 {
1856 fprintf(stderr, "pcre2grep: Too many errors - abandoned.\n");
1857 pcre2grep_exit(2);
1858 }
1859 return invert; /* No more matching; don't show the line again */
1860 }
1861
1862return FALSE; /* No match, no errors */
1863}
1864
1865
1866
1867/*************************************************
1868* Decode dollar escape sequence *
1869*************************************************/
1870
1871/* Called from various places to decode $ escapes in output strings. The escape
1872sequences are as follows:
1873
1874$<digits> or ${<digits>} returns a capture number. However, if callout is TRUE,
1875zero is never returned; '0' is substituted.
1876
1877$a returns bell.
1878$b returns backspace.
1879$e returns escape.
1880$f returns form feed.
1881$n returns newline.
1882$r returns carriage return.
1883$t returns tab.
1884$v returns vertical tab.
1885$o<digits> returns the character represented by the given octal
1886 number; up to three digits are processed.
1887$o{<digits>} does the same, up to 7 digits, but gives an error for mode-invalid
1888 code points.
1889$x<digits> returns the character represented by the given hexadecimal
1890 number; up to two digits are processed.
1891$x{<digits} does the same, up to 6 digits, but gives an error for mode-invalid
1892 code points.
1893Any other character is substituted by itself. E.g: $$ is replaced by a single
1894dollar.
1895
1896Arguments:
1897 begin the start of the whole string
1898 string points to the $
1899 callout TRUE if in a callout (inhibits error messages)
1900 value where to return a value
1901 last where to return pointer to the last used character
1902
1903Returns: DDE_ERROR after a syntax error
1904 DDE_CAPTURE if *value is a capture number
1905 DDE_CHAR if *value is a character code
1906*/
1907
1908static int
1909decode_dollar_escape(PCRE2_SPTR begin, PCRE2_SPTR string, BOOL callout,
1910 uint32_t *value, PCRE2_SPTR *last)
1911{
1912uint32_t c = 0;
1913int base = 10;
1914int dcount;
1915int rc = DDE_CHAR;
1916BOOL brace = FALSE;
1917
1918switch (*(++string))
1919 {
1920 case 0: /* Syntax error: a character must be present after $. */
1921 if (!callout)
1922 fprintf(stderr, "pcre2grep: Error in output text at offset %d: %s\n",
1923 (int)(string - begin), "no character after $");
1924 *last = string;
1925 return DDE_ERROR;
1926
1927 case '{':
1928 brace = TRUE;
1929 string++;
1930 if (!isdigit(*string)) /* Syntax error: a decimal number required. */
1931 {
1932 if (!callout)
1933 fprintf(stderr, "pcre2grep: Error in output text at offset %d: %s\n",
1934 (int)(string - begin), "decimal number expected");
1935 rc = DDE_ERROR;
1936 break;
1937 }
1938
1939 /* Fall through */
1940
1941 /* The maximum capture number is 65535, so any number greater than that will
1942 always be an unknown capture number. We just stop incrementing, in order to
1943 avoid overflow. */
1944
1945 case '0': case '1': case '2': case '3': case '4':
1946 case '5': case '6': case '7': case '8': case '9':
1947 do
1948 {
1949 if (c <= 65535) c = c * 10 + (*string - '0');
1950 string++;
1951 }
1952 while (*string >= '0' && *string <= '9');
1953 string--; /* Point to last digit */
1954
1955 /* In a callout, capture number 0 is not available. No error can be given,
1956 so just return the character '0'. */
1957
1958 if (callout && c == 0)
1959 {
1960 *value = '0';
1961 }
1962 else
1963 {
1964 *value = c;
1965 rc = DDE_CAPTURE;
1966 }
1967 break;
1968
1969 /* Limit octal numbers to 3 digits without braces, or up to 7 with braces,
1970 for valid Unicode code points. */
1971
1972 case 'o':
1973 base = 8;
1974 string++;
1975 if (*string == '{')
1976 {
1977 brace = TRUE;
1978 string++;
1979 dcount = 7;
1980 }
1981 else dcount = 3;
1982 for (; dcount > 0; dcount--)
1983 {
1984 if (*string < '0' || *string > '7') break;
1985 c = c * 8 + (*string++ - '0');
1986 }
1987 *value = c;
1988 string--; /* Point to last digit */
1989 break;
1990
1991 /* Limit hex numbers to 2 digits without braces, or up to 6 with braces,
1992 for valid Unicode code points. */
1993
1994 case 'x':
1995 base = 16;
1996 string++;
1997 if (*string == '{')
1998 {
1999 brace = TRUE;
2000 string++;
2001 dcount = 6;
2002 }
2003 else dcount = 2;
2004 for (; dcount > 0; dcount--)
2005 {
2006 if (!isxdigit(*string)) break;
2007 if (*string >= '0' && *string <= '9')
2008 c = c *16 + *string++ - '0';
2009 else
2010 c = c * 16 + (*string++ | 0x20) - 'a' + 10;
2011 }
2012 *value = c;
2013 string--; /* Point to last digit */
2014 break;
2015
2016 case 'a': *value = '\a'; break;
2017 case 'b': *value = '\b'; break;
2018#ifndef EBCDIC
2019 case 'e': *value = '\033'; break;
2020#else
2021 case 'e': *value = '\047'; break;
2022#endif
2023 case 'f': *value = '\f'; break;
2024 case 'n': *value = STDOUT_NL_CODE; break;
2025 case 'r': *value = '\r'; break;
2026 case 't': *value = '\t'; break;
2027 case 'v': *value = '\v'; break;
2028
2029 default: *value = *string; break;
2030 }
2031
2032if (brace)
2033 {
2034 c = string[1];
2035 if (c != '}')
2036 {
2037 rc = DDE_ERROR;
2038 if (!callout)
2039 {
2040 if ((base == 8 && c >= '0' && c <= '7') ||
2041 (base == 16 && isxdigit(c)))
2042 {
2043 fprintf(stderr, "pcre2grep: Error in output text at offset %d: "
2044 "too many %s digits\n", (int)(string - begin),
2045 (base == 8)? "octal" : "hex");
2046 }
2047 else
2048 {
2049 fprintf(stderr, "pcre2grep: Error in output text at offset %d: %s\n",
2050 (int)(string - begin), "missing closing brace");
2051 }
2052 }
2053 }
2054 else string++;
2055 }
2056
2057/* Check maximum code point values, but take note of STDOUT_NL_CODE. */
2058
2059if (rc == DDE_CHAR && *value != STDOUT_NL_CODE)
2060 {
2061 uint32_t max = utf? 0x0010ffffu : 0xffu;
2062 if (*value > max)
2063 {
2064 if (!callout)
2065 fprintf(stderr, "pcre2grep: Error in output text at offset %d: "
2066 "code point greater than 0x%x is invalid\n", (int)(string - begin), max);
2067 rc = DDE_ERROR;
2068 }
2069 }
2070
2071*last = string;
2072return rc;
2073}
2074
2075
2076
2077/*************************************************
2078* Check output text for errors *
2079*************************************************/
2080
2081/* Called early, to get errors before doing anything for -O text; also called
2082from callouts to check before outputting.
2083
2084Arguments:
2085 string an --output text string
2086 callout TRUE if in a callout (stops printing errors)
2087
2088Returns: TRUE if OK, FALSE on error
2089*/
2090
2091static BOOL
2092syntax_check_output_text(PCRE2_SPTR string, BOOL callout)
2093{
2094uint32_t value;
2095PCRE2_SPTR begin = string;
2096
2097for (; *string != 0; string++)
2098 {
2099 if (*string == '$' &&
2100 decode_dollar_escape(begin, string, callout, &value, &string) == DDE_ERROR)
2101 return FALSE;
2102 }
2103
2104return TRUE;
2105}
2106
2107
2108/*************************************************
2109* Display output text *
2110*************************************************/
2111
2112/* Display the output text, which is assumed to have already been syntax
2113checked. Output may contain escape sequences started by the dollar sign.
2114
2115Arguments:
2116 string: the output text
2117 callout: TRUE for the builtin callout, FALSE for --output
2118 subject the start of the subject
2119 ovector: capture offsets
2120 capture_top: number of captures
2121
2122Returns: TRUE if something was output, other than newline
2123 FALSE if nothing was output, or newline was last output
2124*/
2125
2126static BOOL
2127display_output_text(PCRE2_SPTR string, BOOL callout, PCRE2_SPTR subject,
2128 PCRE2_SIZE *ovector, PCRE2_SIZE capture_top)
2129{
2130uint32_t value;
2131BOOL printed = FALSE;
2132PCRE2_SPTR begin = string;
2133
2134for (; *string != 0; string++)
2135 {
2136 if (*string == '$')
2137 {
2138 switch(decode_dollar_escape(begin, string, callout, &value, &string))
2139 {
2140 case DDE_CHAR:
2141 if (value == STDOUT_NL_CODE)
2142 {
2143 fprintf(stdout, STDOUT_NL);
2144 printed = FALSE;
2145 continue;
2146 }
2147 break; /* Will print value */
2148
2149 case DDE_CAPTURE:
2150 if (value < capture_top)
2151 {
2152 PCRE2_SIZE capturesize;
2153 value *= 2;
2154 capturesize = ovector[value + 1] - ovector[value];
2155 if (capturesize > 0)
2156 {
2157 print_match(subject + ovector[value], capturesize);
2158 printed = TRUE;
2159 }
2160 }
2161 continue;
2162
2163 default: /* Should not occur */
2164 break;
2165 }
2166 }
2167
2168 else value = *string; /* Not a $ escape */
2169
2170 if (utf && value <= 127) fprintf(stdout, "%c", *string); else
2171 {
2172 int i;
2173 int n = ord2utf8(value);
2174 for (i = 0; i < n; i++) fputc(utf8_buffer[i], stdout);
2175 }
2176
2177 printed = TRUE;
2178 }
2179
2180return printed;
2181}
2182
2183
2184#ifdef SUPPORT_PCRE2GREP_CALLOUT
2185
2186/*************************************************
2187* Parse and execute callout scripts *
2188*************************************************/
2189
2190/* If SUPPORT_PCRE2GREP_CALLOUT_FORK is defined, this function parses a callout
2191string block and executes the program specified by the string. The string is a
2192list of substrings separated by pipe characters. The first substring represents
2193the executable name, and the following substrings specify the arguments:
2194
2195 program_name|param1|param2|...
2196
2197Any substring (including the program name) can contain escape sequences
2198started by the dollar character. The escape sequences are substituted as
2199follows:
2200
2201 $<digits> or ${<digits>} is replaced by the captured substring of the given
2202 decimal number, which must be greater than zero. If the number is greater
2203 than the number of capturing substrings, or if the capture is unset, the
2204 replacement is empty.
2205
2206 Any other character is substituted by itself. E.g: $$ is replaced by a single
2207 dollar or $| replaced by a pipe character.
2208
2209Alternatively, if string starts with pipe, the remainder is taken as an output
2210string, same as --output. This is the only form that is supported if
2211SUPPORT_PCRE2GREP_FORK is not defined. In this case, --om-separator is used to
2212separate each callout, defaulting to newline.
2213
2214Example:
2215
2216 echo -e "abcde\n12345" | pcre2grep \
2217 '(.)(..(.))(?C"/bin/echo|Arg1: [$1] [$2] [$3]|Arg2: $|${1}$| ($4)")()' -
2218
2219 Output:
2220
2221 Arg1: [a] [bcd] [d] Arg2: |a| ()
2222 abcde
2223 Arg1: [1] [234] [4] Arg2: |1| ()
2224 12345
2225
2226Arguments:
2227 blockptr the callout block
2228
2229Returns: currently it always returns with 0
2230*/
2231
2232static int
2233pcre2grep_callout(pcre2_callout_block *calloutptr, void *unused)
2234{
2235PCRE2_SIZE length = calloutptr->callout_string_length;
2236PCRE2_SPTR string = calloutptr->callout_string;
2237PCRE2_SPTR subject = calloutptr->subject;
2238PCRE2_SIZE *ovector = calloutptr->offset_vector;
2239PCRE2_SIZE capture_top = calloutptr->capture_top;
2240
2241#ifdef SUPPORT_PCRE2GREP_CALLOUT_FORK
2242PCRE2_SIZE argsvectorlen = 2;
2243PCRE2_SIZE argslen = 1;
2244char *args;
2245char *argsptr;
2246char **argsvector;
2247char **argsvectorptr;
2248#ifndef WIN32
2249pid_t pid;
2250#endif
2251int result = 0;
2252#endif /* SUPPORT_PCRE2GREP_CALLOUT_FORK */
2253
2254(void)unused; /* Avoid compiler warning */
2255
2256/* Only callouts with strings are supported. */
2257
2258if (string == NULL || length == 0) return 0;
2259
2260/* If there's no command, output the remainder directly. */
2261
2262if (*string == '|')
2263 {
2264 string++;
2265 if (!syntax_check_output_text(string, TRUE)) return 0;
2266 (void)display_output_text(string, TRUE, subject, ovector, capture_top);
2267 return 0;
2268 }
2269
2270#ifndef SUPPORT_PCRE2GREP_CALLOUT_FORK
2271return 0;
2272#else
2273
2274/* Checking syntax and compute the number of string fragments. Callout strings
2275are silently ignored in the event of a syntax error. */
2276
2277while (length > 0)
2278 {
2279 if (*string == '|')
2280 {
2281 argsvectorlen++;
2282 if (argsvectorlen > 10000) return 0; /* Too many args */
2283 }
2284
2285 else if (*string == '$')
2286 {
2287 uint32_t value;
2288 PCRE2_SPTR begin = string;
2289
2290 switch (decode_dollar_escape(begin, string, TRUE, &value, &string))
2291 {
2292 case DDE_CAPTURE:
2293 if (value < capture_top)
2294 {
2295 value *= 2;
2296 argslen += ovector[value + 1] - ovector[value];
2297 }
2298 argslen--; /* Negate the effect of argslen++ below. */
2299 break;
2300
2301 case DDE_CHAR:
2302 if (value == STDOUT_NL_CODE) argslen += STDOUT_NL_LEN - 1;
2303 else if (utf && value > 127) argslen += ord2utf8(value) - 1;
2304 break;
2305
2306 default: /* Should not occur */
2307 case DDE_ERROR:
2308 return 0;
2309 }
2310
2311 length -= (string - begin);
2312 }
2313
2314 string++;
2315 length--;
2316 argslen++;
2317 }
2318
2319/* Get memory for the argument vector and its strings. */
2320
2321args = (char*)malloc(argslen);
2322if (args == NULL) return 0;
2323
2324argsvector = (char**)malloc(argsvectorlen * sizeof(char*));
2325if (argsvector == NULL)
2326 {
2327 free(args);
2328 return 0;
2329 }
2330
2331/* Now reprocess the string and set up the arguments. */
2332
2333argsptr = args;
2334argsvectorptr = argsvector;
2335*argsvectorptr++ = argsptr;
2336
2337length = calloutptr->callout_string_length;
2338string = calloutptr->callout_string;
2339
2340while (length > 0)
2341 {
2342 if (*string == '|')
2343 {
2344 *argsptr++ = '\0';
2345 *argsvectorptr++ = argsptr;
2346 }
2347
2348 else if (*string == '$')
2349 {
2350 uint32_t value;
2351 PCRE2_SPTR begin = string;
2352
2353 switch (decode_dollar_escape(begin, string, TRUE, &value, &string))
2354 {
2355 case DDE_CAPTURE:
2356 if (value < capture_top)
2357 {
2358 PCRE2_SIZE capturesize;
2359 value *= 2;
2360 capturesize = ovector[value + 1] - ovector[value];
2361 memcpy(argsptr, subject + ovector[value], capturesize);
2362 argsptr += capturesize;
2363 }
2364 break;
2365
2366 case DDE_CHAR:
2367 if (value == STDOUT_NL_CODE)
2368 {
2369 memcpy(argsptr, STDOUT_NL, STDOUT_NL_LEN);
2370 argsptr += STDOUT_NL_LEN;
2371 }
2372 else if (utf && value > 127)
2373 {
2374 int n = ord2utf8(value);
2375 memcpy(argsptr, utf8_buffer, n);
2376 argsptr += n;
2377 }
2378 else
2379 {
2380 *argsptr++ = value;
2381 }
2382 break;
2383
2384 default: /* Even though this should not occur, the string having */
2385 case DDE_ERROR: /* been checked above, we need to include the free() */
2386 free(args); /* calls so that source checkers do not complain. */
2387 free(argsvector);
2388 return 0;
2389 }
2390
2391 length -= (string - begin);
2392 }
2393
2394 else *argsptr++ = *string;
2395
2396 /* Advance along the string */
2397
2398 string++;
2399 length--;
2400 }
2401
2402*argsptr++ = '\0';
2403*argsvectorptr = NULL;
2404
2405/* Running an external command is system-dependent. Handle Windows and VMS as
2406necessary, otherwise assume fork(). */
2407
2408#ifdef WIN32
2409result = _spawnvp(_P_WAIT, argsvector[0], (const char * const *)argsvector);
2410
2411#elif defined __VMS
2412 {
2413 char cmdbuf[500];
2414 short i = 0;
2415 int flags = CLI$M_NOCLISYM|CLI$M_NOLOGNAM|CLI$M_NOKEYPAD, status, retstat;
2416 $DESCRIPTOR(cmd, cmdbuf);
2417
2418 cmdbuf[0] = 0;
2419 while (argsvector[i])
2420 {
2421 strcat(cmdbuf, argsvector[i]);
2422 strcat(cmdbuf, " ");
2423 i++;
2424 }
2425 cmd.dsc$w_length = strlen(cmdbuf) - 1;
2426 status = lib$spawn(&cmd, 0,0, &flags, 0,0, &retstat);
2427 if (!(status & 1)) result = 0;
2428 else result = retstat & 1 ? 0 : 1;
2429 }
2430
2431#else /* Neither Windows nor VMS */
2432pid = fork();
2433if (pid == 0)
2434 {
2435 (void)execv(argsvector[0], argsvector);
2436 /* Control gets here if there is an error, e.g. a non-existent program */
2437 exit(1);
2438 }
2439else if (pid > 0)
2440 (void)waitpid(pid, &result, 0);
2441#endif /* End Windows/VMS/other handling */
2442
2443free(args);
2444free(argsvector);
2445
2446/* Currently negative return values are not supported, only zero (match
2447continues) or non-zero (match fails). */
2448
2449return result != 0;
2450#endif /* SUPPORT_PCRE2GREP_CALLOUT_FORK */
2451}
2452#endif /* SUPPORT_PCRE2GREP_CALLOUT */
2453
2454
2455
2456/*************************************************
2457* Read a portion of the file into buffer *
2458*************************************************/
2459
2460static int
2461fill_buffer(void *handle, int frtype, char *buffer, int length,
2462 BOOL input_line_buffered)
2463{
2464(void)frtype; /* Avoid warning when not used */
2465
2466#ifdef SUPPORT_LIBZ
2467if (frtype == FR_LIBZ)
2468 return gzread((gzFile)handle, buffer, length);
2469else
2470#endif
2471
2472#ifdef SUPPORT_LIBBZ2
2473if (frtype == FR_LIBBZ2)
2474 return BZ2_bzread((BZFILE *)handle, buffer, length);
2475else
2476#endif
2477
2478return (input_line_buffered ?
2479 read_one_line(buffer, length, (FILE *)handle) :
2480 fread(buffer, 1, length, (FILE *)handle));
2481}
2482
2483
2484
2485/*************************************************
2486* Grep an individual file *
2487*************************************************/
2488
2489/* This is called from grep_or_recurse() below. It uses a buffer that is three
2490times the value of bufthird. The matching point is never allowed to stray into
2491the top third of the buffer, thus keeping more of the file available for
2492context printing or for multiline scanning. For large files, the pointer will
2493be in the middle third most of the time, so the bottom third is available for
2494"before" context printing.
2495
2496Arguments:
2497 handle the fopened FILE stream for a normal file
2498 the gzFile pointer when reading is via libz
2499 the BZFILE pointer when reading is via libbz2
2500 frtype FR_PLAIN, FR_LIBZ, or FR_LIBBZ2
2501 filename the file name or NULL (for errors)
2502 printname the file name if it is to be printed for each match
2503 or NULL if the file name is not to be printed
2504 it cannot be NULL if filenames[_nomatch]_only is set
2505
2506Returns: 0 if there was at least one match
2507 1 otherwise (no matches)
2508 2 if an overlong line is encountered
2509 3 if there is a read error on a .bz2 file
2510*/
2511
2512static int
2513pcre2grep(void *handle, int frtype, const char *filename, const char *printname)
2514{
2515int rc = 1;
2516int filepos = 0;
2517unsigned long int linenumber = 1;
2518unsigned long int lastmatchnumber = 0;
2519unsigned long int count = 0;
2520long int count_matched_lines = 0;
2521char *lastmatchrestart = main_buffer;
2522char *ptr = main_buffer;
2523char *endptr;
2524PCRE2_SIZE bufflength;
2525BOOL binary = FALSE;
2526BOOL endhyphenpending = FALSE;
2527BOOL lines_printed = FALSE;
2528BOOL input_line_buffered = line_buffered;
2529FILE *in = NULL; /* Ensure initialized */
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07002530long stream_start = -1; /* Only non-negative if relevant */
Elliott Hughes5b808042021-10-01 10:56:10 -07002531
2532/* Do the first read into the start of the buffer and set up the pointer to end
2533of what we have. In the case of libz, a non-zipped .gz file will be read as a
2534plain file. However, if a .bz2 file isn't actually bzipped, the first read will
2535fail. */
2536
2537if (frtype != FR_LIBZ && frtype != FR_LIBBZ2)
2538 {
2539 in = (FILE *)handle;
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07002540 if (feof(in))
2541 return 1;
2542 if (is_file_tty(in))
2543 input_line_buffered = TRUE;
2544 else
2545 {
2546 if (count_limit >= 0 && filename == stdin_name)
2547 stream_start = ftell(in);
2548 }
Elliott Hughes5b808042021-10-01 10:56:10 -07002549 }
2550else input_line_buffered = FALSE;
2551
2552bufflength = fill_buffer(handle, frtype, main_buffer, bufsize,
2553 input_line_buffered);
2554
2555#ifdef SUPPORT_LIBBZ2
2556if (frtype == FR_LIBBZ2 && (int)bufflength < 0) return 2; /* Gotcha: bufflength is PCRE2_SIZE */
2557#endif
2558
2559endptr = main_buffer + bufflength;
2560
2561/* Unless binary-files=text, see if we have a binary file. This uses the same
2562rule as GNU grep, namely, a search for a binary zero byte near the start of the
2563file. However, when the newline convention is binary zero, we can't do this. */
2564
2565if (binary_files != BIN_TEXT)
2566 {
2567 if (endlinetype != PCRE2_NEWLINE_NUL)
2568 binary = memchr(main_buffer, 0, (bufflength > 1024)? 1024 : bufflength)
2569 != NULL;
2570 if (binary && binary_files == BIN_NOMATCH) return 1;
2571 }
2572
2573/* Loop while the current pointer is not at the end of the file. For large
2574files, endptr will be at the end of the buffer when we are in the middle of the
2575file, but ptr will never get there, because as soon as it gets over 2/3 of the
2576way, the buffer is shifted left and re-filled. */
2577
2578while (ptr < endptr)
2579 {
2580 int endlinelength;
2581 int mrc = 0;
2582 unsigned int options = 0;
2583 BOOL match;
2584 BOOL line_matched = FALSE;
2585 char *t = ptr;
2586 PCRE2_SIZE length, linelength;
2587 PCRE2_SIZE startoffset = 0;
2588
2589 /* If the -m option set a limit for the number of matched or non-matched
2590 lines, check it here. A limit of zero means that no matching is ever done.
2591 For stdin from a file, set the file position. */
2592
2593 if (count_limit >= 0 && count_matched_lines >= count_limit)
2594 {
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07002595 if (stream_start >= 0)
2596 (void)fseek(handle, stream_start + (long int)filepos, SEEK_SET);
Elliott Hughes5b808042021-10-01 10:56:10 -07002597 rc = (count_limit == 0)? 1 : 0;
2598 break;
2599 }
2600
2601 /* At this point, ptr is at the start of a line. We need to find the length
2602 of the subject string to pass to pcre2_match(). In multiline mode, it is the
2603 length remainder of the data in the buffer. Otherwise, it is the length of
2604 the next line, excluding the terminating newline. After matching, we always
2605 advance by the length of the next line. In multiline mode the PCRE2_FIRSTLINE
2606 option is used for compiling, so that any match is constrained to be in the
2607 first line. */
2608
2609 t = end_of_line(t, endptr, &endlinelength);
2610 linelength = t - ptr - endlinelength;
2611 length = multiline? (PCRE2_SIZE)(endptr - ptr) : linelength;
2612
2613 /* Check to see if the line we are looking at extends right to the very end
2614 of the buffer without a line terminator. This means the line is too long to
2615 handle at the current buffer size. Until the buffer reaches its maximum size,
2616 try doubling it and reading more data. */
2617
2618 if (endlinelength == 0 && t == main_buffer + bufsize)
2619 {
2620 if (bufthird < max_bufthird)
2621 {
2622 char *new_buffer;
2623 int new_bufthird = 2*bufthird;
2624
2625 if (new_bufthird > max_bufthird) new_bufthird = max_bufthird;
2626 new_buffer = (char *)malloc(3*new_bufthird);
2627
2628 if (new_buffer == NULL)
2629 {
2630 fprintf(stderr,
2631 "pcre2grep: line %lu%s%s is too long for the internal buffer\n"
2632 "pcre2grep: not enough memory to increase the buffer size to %d\n",
2633 linenumber,
2634 (filename == NULL)? "" : " of file ",
2635 (filename == NULL)? "" : filename,
2636 new_bufthird);
2637 return 2;
2638 }
2639
2640 /* Copy the data and adjust pointers to the new buffer location. */
2641
2642 memcpy(new_buffer, main_buffer, bufsize);
2643 bufthird = new_bufthird;
2644 bufsize = 3*bufthird;
2645 ptr = new_buffer + (ptr - main_buffer);
2646 lastmatchrestart = new_buffer + (lastmatchrestart - main_buffer);
2647 free(main_buffer);
2648 main_buffer = new_buffer;
2649
2650 /* Read more data into the buffer and then try to find the line ending
2651 again. */
2652
2653 bufflength += fill_buffer(handle, frtype, main_buffer + bufflength,
2654 bufsize - bufflength, input_line_buffered);
2655 endptr = main_buffer + bufflength;
2656 continue;
2657 }
2658 else
2659 {
2660 fprintf(stderr,
2661 "pcre2grep: line %lu%s%s is too long for the internal buffer\n"
2662 "pcre2grep: the maximum buffer size is %d\n"
2663 "pcre2grep: use the --max-buffer-size option to change it\n",
2664 linenumber,
2665 (filename == NULL)? "" : " of file ",
2666 (filename == NULL)? "" : filename,
2667 bufthird);
2668 return 2;
2669 }
2670 }
2671
Elliott Hughes5b808042021-10-01 10:56:10 -07002672 /* We come back here after a match when only_matching_count is non-zero, in
2673 order to find any further matches in the same line. This applies to
2674 --only-matching, --file-offsets, and --line-offsets. */
2675
2676 ONLY_MATCHING_RESTART:
2677
2678 /* Run through all the patterns until one matches or there is an error other
2679 than NOMATCH. This code is in a subroutine so that it can be re-used for
2680 finding subsequent matches when colouring matched lines. After finding one
2681 match, set PCRE2_NOTEMPTY to disable any further matches of null strings in
2682 this line. */
2683
2684 match = match_patterns(ptr, length, options, startoffset, &mrc);
2685 options = PCRE2_NOTEMPTY;
2686
2687 /* If it's a match or a not-match (as required), do what's wanted. NOTE: Use
2688 only FWRITE_IGNORE() - which is just a packaged fwrite() that ignores its
2689 return code - to output data lines, so that binary zeroes are treated as just
2690 another data character. */
2691
2692 if (match != invert)
2693 {
2694 BOOL hyphenprinted = FALSE;
2695
2696 /* We've failed if we want a file that doesn't have any matches. */
2697
2698 if (filenames == FN_NOMATCH_ONLY) return 1;
2699
2700 /* Remember that this line matched (for counting matched lines) */
2701
2702 line_matched = TRUE;
2703
2704 /* If all we want is a yes/no answer, we can return immediately. */
2705
2706 if (quiet) return 0;
2707
2708 /* Just count if just counting is wanted. */
2709
2710 else if (count_only || show_total_count) count++;
2711
2712 /* When handling a binary file and binary-files==binary, the "binary"
2713 variable will be set true (it's false in all other cases). In this
2714 situation we just want to output the file name. No need to scan further. */
2715
2716 else if (binary)
2717 {
2718 fprintf(stdout, "Binary file %s matches" STDOUT_NL, filename);
2719 return 0;
2720 }
2721
2722 /* Likewise, if all we want is a file name, there is no need to scan any
2723 more lines in the file. */
2724
2725 else if (filenames == FN_MATCH_ONLY)
2726 {
2727 fprintf(stdout, "%s" STDOUT_NL, printname);
2728 return 0;
2729 }
2730
2731 /* The --only-matching option prints just the substring that matched,
2732 and/or one or more captured portions of it, as long as these strings are
2733 not empty. The --file-offsets and --line-offsets options output offsets for
2734 the matching substring (all three set only_matching_count non-zero). None
2735 of these mutually exclusive options prints any context. Afterwards, adjust
2736 the start and then jump back to look for further matches in the same line.
2737 If we are in invert mode, however, nothing is printed and we do not restart
2738 - this could still be useful because the return code is set. */
2739
2740 else if (only_matching_count != 0)
2741 {
2742 if (!invert)
2743 {
2744 PCRE2_SIZE oldstartoffset;
2745
2746 if (printname != NULL) fprintf(stdout, "%s:", printname);
2747 if (number) fprintf(stdout, "%lu:", linenumber);
2748
2749 /* Handle --line-offsets */
2750
2751 if (line_offsets)
2752 fprintf(stdout, "%d,%d" STDOUT_NL, (int)(ptr + offsets[0] - ptr),
2753 (int)(offsets[1] - offsets[0]));
2754
2755 /* Handle --file-offsets */
2756
2757 else if (file_offsets)
2758 fprintf(stdout, "%d,%d" STDOUT_NL,
2759 (int)(filepos + ptr + offsets[0] - ptr),
2760 (int)(offsets[1] - offsets[0]));
2761
2762 /* Handle --output (which has already been syntax checked) */
2763
2764 else if (output_text != NULL)
2765 {
2766 if (display_output_text((PCRE2_SPTR)output_text, FALSE,
2767 (PCRE2_SPTR)ptr, offsets, mrc) || printname != NULL ||
2768 number)
2769 fprintf(stdout, STDOUT_NL);
2770 }
2771
2772 /* Handle --only-matching, which may occur many times */
2773
2774 else
2775 {
2776 BOOL printed = FALSE;
2777 omstr *om;
2778
2779 for (om = only_matching; om != NULL; om = om->next)
2780 {
2781 int n = om->groupnum;
2782 if (n == 0 || n < mrc)
2783 {
2784 int plen = offsets[2*n + 1] - offsets[2*n];
2785 if (plen > 0)
2786 {
2787 if (printed && om_separator != NULL)
2788 fprintf(stdout, "%s", om_separator);
2789 print_match(ptr + offsets[n*2], plen);
2790 printed = TRUE;
2791 }
2792 }
2793 }
2794
2795 if (printed || printname != NULL || number)
2796 fprintf(stdout, STDOUT_NL);
2797 }
2798
2799 /* Prepare to repeat to find the next match in the line. */
2800
2801 match = FALSE;
2802 if (line_buffered) fflush(stdout);
2803 rc = 0; /* Had some success */
2804
2805 /* If the pattern contained a lookbehind that included \K, it is
2806 possible that the end of the match might be at or before the actual
2807 starting offset we have just used. In this case, start one character
2808 further on. */
2809
2810 startoffset = offsets[1]; /* Restart after the match */
2811 oldstartoffset = pcre2_get_startchar(match_data);
2812 if (startoffset <= oldstartoffset)
2813 {
2814 if (startoffset >= length) goto END_ONE_MATCH; /* Were at end */
2815 startoffset = oldstartoffset + 1;
2816 if (utf) while ((ptr[startoffset] & 0xc0) == 0x80) startoffset++;
2817 }
2818
2819 /* If the current match ended past the end of the line (only possible
2820 in multiline mode), we must move on to the line in which it did end
2821 before searching for more matches. */
2822
2823 while (startoffset > linelength)
2824 {
2825 ptr += linelength + endlinelength;
2826 filepos += (int)(linelength + endlinelength);
2827 linenumber++;
2828 startoffset -= (int)(linelength + endlinelength);
2829 t = end_of_line(ptr, endptr, &endlinelength);
2830 linelength = t - ptr - endlinelength;
2831 length = (PCRE2_SIZE)(endptr - ptr);
2832 }
2833
2834 goto ONLY_MATCHING_RESTART;
2835 }
2836 }
2837
2838 /* This is the default case when none of the above options is set. We print
2839 the matching lines(s), possibly preceded and/or followed by other lines of
2840 context. */
2841
2842 else
2843 {
2844 lines_printed = TRUE;
2845
2846 /* See if there is a requirement to print some "after" lines from a
2847 previous match. We never print any overlaps. */
2848
2849 if (after_context > 0 && lastmatchnumber > 0)
2850 {
2851 int ellength;
2852 int linecount = 0;
2853 char *p = lastmatchrestart;
2854
2855 while (p < ptr && linecount < after_context)
2856 {
2857 p = end_of_line(p, ptr, &ellength);
2858 linecount++;
2859 }
2860
2861 /* It is important to advance lastmatchrestart during this printing so
2862 that it interacts correctly with any "before" printing below. Print
2863 each line's data using fwrite() in case there are binary zeroes. */
2864
2865 while (lastmatchrestart < p)
2866 {
2867 char *pp = lastmatchrestart;
2868 if (printname != NULL) fprintf(stdout, "%s-", printname);
2869 if (number) fprintf(stdout, "%lu-", lastmatchnumber++);
2870 pp = end_of_line(pp, endptr, &ellength);
2871 FWRITE_IGNORE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
2872 lastmatchrestart = pp;
2873 }
2874 if (lastmatchrestart != ptr) hyphenpending = TRUE;
2875 }
2876
2877 /* If there were non-contiguous lines printed above, insert hyphens. */
2878
2879 if (hyphenpending)
2880 {
2881 fprintf(stdout, "--" STDOUT_NL);
2882 hyphenpending = FALSE;
2883 hyphenprinted = TRUE;
2884 }
2885
2886 /* See if there is a requirement to print some "before" lines for this
2887 match. Again, don't print overlaps. */
2888
2889 if (before_context > 0)
2890 {
2891 int linecount = 0;
2892 char *p = ptr;
2893
2894 while (p > main_buffer &&
2895 (lastmatchnumber == 0 || p > lastmatchrestart) &&
2896 linecount < before_context)
2897 {
2898 linecount++;
2899 p = previous_line(p, main_buffer);
2900 }
2901
2902 if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
2903 fprintf(stdout, "--" STDOUT_NL);
2904
2905 while (p < ptr)
2906 {
2907 int ellength;
2908 char *pp = p;
2909 if (printname != NULL) fprintf(stdout, "%s-", printname);
2910 if (number) fprintf(stdout, "%lu-", linenumber - linecount--);
2911 pp = end_of_line(pp, endptr, &ellength);
2912 FWRITE_IGNORE(p, 1, pp - p, stdout);
2913 p = pp;
2914 }
2915 }
2916
2917 /* Now print the matching line(s); ensure we set hyphenpending at the end
2918 of the file if any context lines are being output. */
2919
2920 if (after_context > 0 || before_context > 0)
2921 endhyphenpending = TRUE;
2922
2923 if (printname != NULL) fprintf(stdout, "%s:", printname);
2924 if (number) fprintf(stdout, "%lu:", linenumber);
2925
Elliott Hughes5b808042021-10-01 10:56:10 -07002926 /* In multiline mode, or if colouring, we have to split the line(s) up
2927 and search for further matches, but not of course if the line is a
2928 non-match. In multiline mode this is necessary in case there is another
2929 match that spans the end of the current line. When colouring we want to
2930 colour all matches. */
2931
2932 if ((multiline || do_colour) && !invert)
2933 {
2934 int plength;
2935 PCRE2_SIZE endprevious;
2936
2937 /* The use of \K may make the end offset earlier than the start. In
2938 this situation, swap them round. */
2939
2940 if (offsets[0] > offsets[1])
2941 {
2942 PCRE2_SIZE temp = offsets[0];
2943 offsets[0] = offsets[1];
2944 offsets[1] = temp;
2945 }
2946
2947 FWRITE_IGNORE(ptr, 1, offsets[0], stdout);
2948 print_match(ptr + offsets[0], offsets[1] - offsets[0]);
2949
2950 for (;;)
2951 {
2952 PCRE2_SIZE oldstartoffset = pcre2_get_startchar(match_data);
2953
2954 endprevious = offsets[1];
2955 startoffset = endprevious; /* Advance after previous match. */
2956
2957 /* If the pattern contained a lookbehind that included \K, it is
2958 possible that the end of the match might be at or before the actual
2959 starting offset we have just used. In this case, start one character
2960 further on. */
2961
2962 if (startoffset <= oldstartoffset)
2963 {
2964 startoffset = oldstartoffset + 1;
2965 if (utf) while ((ptr[startoffset] & 0xc0) == 0x80) startoffset++;
2966 }
2967
2968 /* If the current match ended past the end of the line (only possible
2969 in multiline mode), we must move on to the line in which it did end
2970 before searching for more matches. Because the PCRE2_FIRSTLINE option
2971 is set, the start of the match will always be before the first
2972 newline sequence. */
2973
2974 while (startoffset > linelength + endlinelength)
2975 {
2976 ptr += linelength + endlinelength;
2977 filepos += (int)(linelength + endlinelength);
2978 linenumber++;
2979 startoffset -= (int)(linelength + endlinelength);
2980 endprevious -= (int)(linelength + endlinelength);
2981 t = end_of_line(ptr, endptr, &endlinelength);
2982 linelength = t - ptr - endlinelength;
2983 length = (PCRE2_SIZE)(endptr - ptr);
2984 }
2985
2986 /* If startoffset is at the exact end of the line it means this
2987 complete line was the final part of the match, so there is nothing
2988 more to do. */
2989
2990 if (startoffset == linelength + endlinelength) break;
2991
2992 /* Otherwise, run a match from within the final line, and if found,
2993 loop for any that may follow. */
2994
2995 if (!match_patterns(ptr, length, options, startoffset, &mrc)) break;
2996
2997 /* The use of \K may make the end offset earlier than the start. In
2998 this situation, swap them round. */
2999
3000 if (offsets[0] > offsets[1])
3001 {
3002 PCRE2_SIZE temp = offsets[0];
3003 offsets[0] = offsets[1];
3004 offsets[1] = temp;
3005 }
3006
3007 FWRITE_IGNORE(ptr + endprevious, 1, offsets[0] - endprevious, stdout);
3008 print_match(ptr + offsets[0], offsets[1] - offsets[0]);
3009 }
3010
3011 /* In multiline mode, we may have already printed the complete line
3012 and its line-ending characters (if they matched the pattern), so there
3013 may be no more to print. */
3014
3015 plength = (int)((linelength + endlinelength) - endprevious);
3016 if (plength > 0) FWRITE_IGNORE(ptr + endprevious, 1, plength, stdout);
3017 }
3018
3019 /* Not colouring or multiline; no need to search for further matches. */
3020
3021 else FWRITE_IGNORE(ptr, 1, linelength + endlinelength, stdout);
3022 }
3023
3024 /* End of doing what has to be done for a match. If --line-buffered was
3025 given, flush the output. */
3026
3027 if (line_buffered) fflush(stdout);
3028 rc = 0; /* Had some success */
3029
3030 /* Remember where the last match happened for after_context. We remember
3031 where we are about to restart, and that line's number. */
3032
3033 lastmatchrestart = ptr + linelength + endlinelength;
3034 lastmatchnumber = linenumber + 1;
3035
3036 /* If a line was printed and we are now at the end of the file and the last
3037 line had no newline, output one. */
3038
3039 if (lines_printed && lastmatchrestart >= endptr && endlinelength == 0)
3040 write_final_newline();
3041 }
3042
3043 /* For a match in multiline inverted mode (which of course did not cause
3044 anything to be printed), we have to move on to the end of the match before
3045 proceeding. */
3046
3047 if (multiline && invert && match)
3048 {
3049 int ellength;
3050 char *endmatch = ptr + offsets[1];
3051 t = ptr;
3052 while (t < endmatch)
3053 {
3054 t = end_of_line(t, endptr, &ellength);
3055 if (t <= endmatch) linenumber++; else break;
3056 }
3057 endmatch = end_of_line(endmatch, endptr, &ellength);
3058 linelength = endmatch - ptr - ellength;
3059 }
3060
3061 /* Advance to after the newline and increment the line number. The file
3062 offset to the current line is maintained in filepos. */
3063
3064 END_ONE_MATCH:
3065 ptr += linelength + endlinelength;
3066 filepos += (int)(linelength + endlinelength);
3067 linenumber++;
3068
3069 /* If there was at least one match (or a non-match, as required) in the line,
3070 increment the count for the -m option. */
3071
3072 if (line_matched) count_matched_lines++;
3073
3074 /* If input is line buffered, and the buffer is not yet full, read another
3075 line and add it into the buffer. */
3076
3077 if (input_line_buffered && bufflength < (PCRE2_SIZE)bufsize)
3078 {
3079 int add = read_one_line(ptr, bufsize - (int)(ptr - main_buffer), in);
3080 bufflength += add;
3081 endptr += add;
3082 }
3083
3084 /* If we haven't yet reached the end of the file (the buffer is full), and
3085 the current point is in the top 1/3 of the buffer, slide the buffer down by
3086 1/3 and refill it. Before we do this, if some unprinted "after" lines are
3087 about to be lost, print them. */
3088
3089 if (bufflength >= (PCRE2_SIZE)bufsize && ptr > main_buffer + 2*bufthird)
3090 {
3091 if (after_context > 0 &&
3092 lastmatchnumber > 0 &&
3093 lastmatchrestart < main_buffer + bufthird)
3094 {
3095 do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
3096 lastmatchnumber = 0; /* Indicates no after lines pending */
3097 }
3098
3099 /* Now do the shuffle */
3100
3101 (void)memmove(main_buffer, main_buffer + bufthird, 2*bufthird);
3102 ptr -= bufthird;
3103
3104 bufflength = 2*bufthird + fill_buffer(handle, frtype,
3105 main_buffer + 2*bufthird, bufthird, input_line_buffered);
3106 endptr = main_buffer + bufflength;
3107
3108 /* Adjust any last match point */
3109
3110 if (lastmatchnumber > 0) lastmatchrestart -= bufthird;
3111 }
3112 } /* Loop through the whole file */
3113
3114/* End of file; print final "after" lines if wanted; do_after_lines sets
3115hyphenpending if it prints something. */
3116
3117if (only_matching_count == 0 && !(count_only|show_total_count))
3118 {
3119 do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
3120 hyphenpending |= endhyphenpending;
3121 }
3122
3123/* Print the file name if we are looking for those without matches and there
3124were none. If we found a match, we won't have got this far. */
3125
3126if (filenames == FN_NOMATCH_ONLY)
3127 {
3128 fprintf(stdout, "%s" STDOUT_NL, printname);
3129 return 0;
3130 }
3131
3132/* Print the match count if wanted */
3133
3134if (count_only && !quiet)
3135 {
3136 if (count > 0 || !omit_zero_count)
3137 {
3138 if (printname != NULL && filenames != FN_NONE)
3139 fprintf(stdout, "%s:", printname);
3140 fprintf(stdout, "%lu" STDOUT_NL, count);
3141 counts_printed++;
3142 }
3143 }
3144
3145total_count += count; /* Can be set without count_only */
3146return rc;
3147}
3148
3149
3150
3151/*************************************************
3152* Grep a file or recurse into a directory *
3153*************************************************/
3154
3155/* Given a path name, if it's a directory, scan all the files if we are
3156recursing; if it's a file, grep it.
3157
3158Arguments:
3159 pathname the path to investigate
3160 dir_recurse TRUE if recursing is wanted (-r or -drecurse)
3161 only_one_at_top TRUE if the path is the only one at toplevel
3162
3163Returns: -1 the file/directory was skipped
3164 0 if there was at least one match
3165 1 if there were no matches
3166 2 there was some kind of error
3167
3168However, file opening failures are suppressed if "silent" is set.
3169*/
3170
3171static int
3172grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top)
3173{
3174int rc = 1;
3175int frtype;
3176void *handle;
3177char *lastcomp;
3178FILE *in = NULL; /* Ensure initialized */
3179
3180#ifdef SUPPORT_LIBZ
3181gzFile ingz = NULL;
3182#endif
3183
3184#ifdef SUPPORT_LIBBZ2
3185BZFILE *inbz2 = NULL;
3186#endif
3187
3188#if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
3189int pathlen;
3190#endif
3191
3192#if defined NATIVE_ZOS
3193int zos_type;
3194FILE *zos_test_file;
3195#endif
3196
3197/* If the file name is "-" we scan stdin */
3198
3199if (strcmp(pathname, "-") == 0)
3200 {
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07003201 if (count_limit >= 0) setbuf(stdin, NULL);
Elliott Hughes5b808042021-10-01 10:56:10 -07003202 return pcre2grep(stdin, FR_PLAIN, stdin_name,
3203 (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
3204 stdin_name : NULL);
3205 }
3206
3207/* Inclusion and exclusion: --include-dir and --exclude-dir apply only to
3208directories, whereas --include and --exclude apply to everything else. The test
3209is against the final component of the path. */
3210
3211lastcomp = strrchr(pathname, FILESEP);
3212lastcomp = (lastcomp == NULL)? pathname : lastcomp + 1;
3213
3214/* If the file is a directory, skip if not recursing or if explicitly excluded.
3215Otherwise, scan the directory and recurse for each path within it. The scanning
3216code is localized so it can be made system-specific. */
3217
3218
3219/* For z/OS, determine the file type. */
3220
3221#if defined NATIVE_ZOS
3222zos_test_file = fopen(pathname,"rb");
3223
3224if (zos_test_file == NULL)
3225 {
3226 if (!silent) fprintf(stderr, "pcre2grep: failed to test next file %s\n",
3227 pathname, strerror(errno));
3228 return -1;
3229 }
3230zos_type = identifyzosfiletype (zos_test_file);
3231fclose (zos_test_file);
3232
3233/* Handle a PDS in separate code */
3234
3235if (zos_type == __ZOS_PDS || zos_type == __ZOS_PDSE)
3236 {
3237 return travelonpdsdir (pathname, only_one_at_top);
3238 }
3239
3240/* Deal with regular files in the normal way below. These types are:
3241 zos_type == __ZOS_PDS_MEMBER
3242 zos_type == __ZOS_PS
3243 zos_type == __ZOS_VSAM_KSDS
3244 zos_type == __ZOS_VSAM_ESDS
3245 zos_type == __ZOS_VSAM_RRDS
3246*/
3247
3248/* Handle a z/OS directory using common code. */
3249
3250else if (zos_type == __ZOS_HFS)
3251 {
3252#endif /* NATIVE_ZOS */
3253
3254
3255/* Handle directories: common code for all OS */
3256
3257if (isdirectory(pathname))
3258 {
3259 if (dee_action == dee_SKIP ||
3260 !test_incexc(lastcomp, include_dir_patterns, exclude_dir_patterns))
3261 return -1;
3262
3263 if (dee_action == dee_RECURSE)
3264 {
3265 char childpath[FNBUFSIZ];
3266 char *nextfile;
3267 directory_type *dir = opendirectory(pathname);
3268
3269 if (dir == NULL)
3270 {
3271 if (!silent)
3272 fprintf(stderr, "pcre2grep: Failed to open directory %s: %s\n", pathname,
3273 strerror(errno));
3274 return 2;
3275 }
3276
3277 while ((nextfile = readdirectory(dir)) != NULL)
3278 {
3279 int frc;
3280 int fnlength = strlen(pathname) + strlen(nextfile) + 2;
3281 if (fnlength > FNBUFSIZ)
3282 {
3283 fprintf(stderr, "pcre2grep: recursive filename is too long\n");
3284 rc = 2;
3285 break;
3286 }
3287 sprintf(childpath, "%s%c%s", pathname, FILESEP, nextfile);
3288
3289 /* If the realpath() function is available, we can try to prevent endless
3290 recursion caused by a symlink pointing to a parent directory (GitHub
3291 issue #2 (old Bugzilla #2794). Original patch from Thomas Tempelmann.
3292 Modified to avoid using strlcat() because that isn't a standard C
3293 function, and also modified not to copy back the fully resolved path,
3294 because that affects the output from pcre2grep. */
3295
3296#ifdef HAVE_REALPATH
Elliott Hughes16619d62021-10-29 12:10:38 -07003297 {
Elliott Hughes5b808042021-10-01 10:56:10 -07003298 char resolvedpath[PATH_MAX];
Elliott Hughes16619d62021-10-29 12:10:38 -07003299 BOOL isSame;
3300 size_t rlen;
Elliott Hughes5b808042021-10-01 10:56:10 -07003301 if (realpath(childpath, resolvedpath) == NULL)
3302 continue; /* This path is invalid - we can skip processing this */
Elliott Hughes16619d62021-10-29 12:10:38 -07003303 isSame = strcmp(pathname, resolvedpath) == 0;
Elliott Hughes5b808042021-10-01 10:56:10 -07003304 if (isSame) continue; /* We have a recursion */
Elliott Hughes16619d62021-10-29 12:10:38 -07003305 rlen = strlen(resolvedpath);
Elliott Hughes5b808042021-10-01 10:56:10 -07003306 if (rlen++ < sizeof(resolvedpath) - 3)
3307 {
Elliott Hughes16619d62021-10-29 12:10:38 -07003308 BOOL contained;
Elliott Hughes5b808042021-10-01 10:56:10 -07003309 strcat(resolvedpath, "/");
Elliott Hughes16619d62021-10-29 12:10:38 -07003310 contained = strncmp(pathname, resolvedpath, rlen) == 0;
Elliott Hughes5b808042021-10-01 10:56:10 -07003311 if (contained) continue; /* We have a recursion */
3312 }
Elliott Hughes16619d62021-10-29 12:10:38 -07003313 }
Elliott Hughes5b808042021-10-01 10:56:10 -07003314#endif /* HAVE_REALPATH */
3315
3316 frc = grep_or_recurse(childpath, dir_recurse, FALSE);
3317 if (frc > 1) rc = frc;
3318 else if (frc == 0 && rc == 1) rc = 0;
3319 }
3320
3321 closedirectory(dir);
3322 return rc;
3323 }
3324 }
3325
3326#ifdef WIN32
3327if (iswild(pathname))
3328 {
3329 char buffer[1024];
3330 char *nextfile;
3331 char *name;
3332 directory_type *dir = opendirectory(pathname);
3333
3334 if (dir == NULL)
3335 return 0;
3336
3337 for (nextfile = name = pathname; *nextfile != 0; nextfile++)
3338 if (*nextfile == '/' || *nextfile == '\\')
3339 name = nextfile + 1;
3340 *name = 0;
3341
3342 while ((nextfile = readdirectory(dir)) != NULL)
3343 {
3344 int frc;
3345 sprintf(buffer, "%.512s%.128s", pathname, nextfile);
3346 frc = grep_or_recurse(buffer, dir_recurse, FALSE);
3347 if (frc > 1) rc = frc;
3348 else if (frc == 0 && rc == 1) rc = 0;
3349 }
3350
3351 closedirectory(dir);
3352 return rc;
3353 }
3354#endif
3355
3356#if defined NATIVE_ZOS
3357 }
3358#endif
3359
3360/* If the file is not a directory, check for a regular file, and if it is not,
3361skip it if that's been requested. Otherwise, check for an explicit inclusion or
3362exclusion. */
3363
3364else if (
3365#if defined NATIVE_ZOS
3366 (zos_type == __ZOS_NOFILE && DEE_action == DEE_SKIP) ||
3367#else /* all other OS */
3368 (!isregfile(pathname) && DEE_action == DEE_SKIP) ||
3369#endif
3370 !test_incexc(lastcomp, include_patterns, exclude_patterns))
3371 return -1; /* File skipped */
3372
3373/* Control reaches here if we have a regular file, or if we have a directory
3374and recursion or skipping was not requested, or if we have anything else and
3375skipping was not requested. The scan proceeds. If this is the first and only
3376argument at top level, we don't show the file name, unless we are only showing
3377the file name, or the filename was forced (-H). */
3378
3379#if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
3380pathlen = (int)(strlen(pathname));
3381#endif
3382
3383/* Open using zlib if it is supported and the file name ends with .gz. */
3384
3385#ifdef SUPPORT_LIBZ
3386if (pathlen > 3 && strcmp(pathname + pathlen - 3, ".gz") == 0)
3387 {
3388 ingz = gzopen(pathname, "rb");
3389 if (ingz == NULL)
3390 {
3391 if (!silent)
3392 fprintf(stderr, "pcre2grep: Failed to open %s: %s\n", pathname,
3393 strerror(errno));
3394 return 2;
3395 }
3396 handle = (void *)ingz;
3397 frtype = FR_LIBZ;
3398 }
3399else
3400#endif
3401
3402/* Otherwise open with bz2lib if it is supported and the name ends with .bz2. */
3403
3404#ifdef SUPPORT_LIBBZ2
3405if (pathlen > 4 && strcmp(pathname + pathlen - 4, ".bz2") == 0)
3406 {
3407 inbz2 = BZ2_bzopen(pathname, "rb");
3408 handle = (void *)inbz2;
3409 frtype = FR_LIBBZ2;
3410 }
3411else
3412#endif
3413
3414/* Otherwise use plain fopen(). The label is so that we can come back here if
3415an attempt to read a .bz2 file indicates that it really is a plain file. */
3416
3417#ifdef SUPPORT_LIBBZ2
3418PLAIN_FILE:
3419#endif
3420 {
3421 in = fopen(pathname, "rb");
3422 handle = (void *)in;
3423 frtype = FR_PLAIN;
3424 }
3425
3426/* All the opening methods return errno when they fail. */
3427
3428if (handle == NULL)
3429 {
3430 if (!silent)
3431 fprintf(stderr, "pcre2grep: Failed to open %s: %s\n", pathname,
3432 strerror(errno));
3433 return 2;
3434 }
3435
3436/* Now grep the file */
3437
3438rc = pcre2grep(handle, frtype, pathname, (filenames > FN_DEFAULT ||
3439 (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
3440
3441/* Close in an appropriate manner. */
3442
3443#ifdef SUPPORT_LIBZ
3444if (frtype == FR_LIBZ)
3445 gzclose(ingz);
3446else
3447#endif
3448
3449/* If it is a .bz2 file and the result is 3, it means that the first attempt to
3450read failed. If the error indicates that the file isn't in fact bzipped, try
3451again as a normal file. */
3452
3453#ifdef SUPPORT_LIBBZ2
3454if (frtype == FR_LIBBZ2)
3455 {
3456 if (rc == 3)
3457 {
3458 int errnum;
3459 const char *err = BZ2_bzerror(inbz2, &errnum);
3460 if (errnum == BZ_DATA_ERROR_MAGIC)
3461 {
3462 BZ2_bzclose(inbz2);
3463 goto PLAIN_FILE;
3464 }
3465 else if (!silent)
3466 fprintf(stderr, "pcre2grep: Failed to read %s using bzlib: %s\n",
3467 pathname, err);
3468 rc = 2; /* The normal "something went wrong" code */
3469 }
3470 BZ2_bzclose(inbz2);
3471 }
3472else
3473#endif
3474
3475/* Normal file close */
3476
3477fclose(in);
3478
3479/* Pass back the yield from pcre2grep(). */
3480
3481return rc;
3482}
3483
3484
3485
3486/*************************************************
3487* Handle a no-data option *
3488*************************************************/
3489
3490static int
3491handle_option(int letter, int options)
3492{
3493switch(letter)
3494 {
3495 case N_FOFFSETS: file_offsets = TRUE; break;
3496 case N_HELP: help(); pcre2grep_exit(0); break; /* Stops compiler warning */
3497 case N_LBUFFER: line_buffered = TRUE; break;
3498 case N_LOFFSETS: line_offsets = number = TRUE; break;
3499 case N_NOJIT: use_jit = FALSE; break;
3500 case N_ALLABSK: extra_options |= PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK; break;
3501 case 'a': binary_files = BIN_TEXT; break;
3502 case 'c': count_only = TRUE; break;
3503 case 'F': options |= PCRE2_LITERAL; break;
3504 case 'H': filenames = FN_FORCE; break;
3505 case 'I': binary_files = BIN_NOMATCH; break;
3506 case 'h': filenames = FN_NONE; break;
3507 case 'i': options |= PCRE2_CASELESS; break;
3508 case 'l': omit_zero_count = TRUE; filenames = FN_MATCH_ONLY; break;
3509 case 'L': filenames = FN_NOMATCH_ONLY; break;
3510 case 'M': multiline = TRUE; options |= PCRE2_MULTILINE|PCRE2_FIRSTLINE; break;
3511 case 'n': number = TRUE; break;
3512
3513 case 'o':
3514 only_matching_last = add_number(0, only_matching_last);
3515 if (only_matching == NULL) only_matching = only_matching_last;
3516 break;
3517
3518 case 'q': quiet = TRUE; break;
3519 case 'r': dee_action = dee_RECURSE; break;
3520 case 's': silent = TRUE; break;
3521 case 't': show_total_count = TRUE; break;
3522 case 'u': options |= PCRE2_UTF; utf = TRUE; break;
3523 case 'U': options |= PCRE2_UTF|PCRE2_MATCH_INVALID_UTF; utf = TRUE; break;
3524 case 'v': invert = TRUE; break;
3525 case 'w': extra_options |= PCRE2_EXTRA_MATCH_WORD; break;
3526 case 'x': extra_options |= PCRE2_EXTRA_MATCH_LINE; break;
3527
3528 case 'V':
3529 {
3530 unsigned char buffer[128];
3531 (void)pcre2_config(PCRE2_CONFIG_VERSION, buffer);
3532 fprintf(stdout, "pcre2grep version %s" STDOUT_NL, buffer);
3533 }
3534 pcre2grep_exit(0);
3535 break;
3536
3537 default:
3538 fprintf(stderr, "pcre2grep: Unknown option -%c\n", letter);
3539 pcre2grep_exit(usage(2));
3540 }
3541
3542return options;
3543}
3544
3545
3546
3547/*************************************************
3548* Construct printed ordinal *
3549*************************************************/
3550
3551/* This turns a number into "1st", "3rd", etc. */
3552
3553static char *
3554ordin(int n)
3555{
3556static char buffer[14];
3557char *p = buffer;
3558sprintf(p, "%d", n);
3559while (*p != 0) p++;
3560n %= 100;
3561if (n >= 11 && n <= 13) n = 0;
3562switch (n%10)
3563 {
3564 case 1: strcpy(p, "st"); break;
3565 case 2: strcpy(p, "nd"); break;
3566 case 3: strcpy(p, "rd"); break;
3567 default: strcpy(p, "th"); break;
3568 }
3569return buffer;
3570}
3571
3572
3573
3574/*************************************************
3575* Compile a single pattern *
3576*************************************************/
3577
3578/* Do nothing if the pattern has already been compiled. This is the case for
3579include/exclude patterns read from a file.
3580
3581When the -F option has been used, each "pattern" may be a list of strings,
3582separated by line breaks. They will be matched literally. We split such a
3583string and compile the first substring, inserting an additional block into the
3584pattern chain.
3585
3586Arguments:
3587 p points to the pattern block
3588 options the PCRE options
3589 fromfile TRUE if the pattern was read from a file
3590 fromtext file name or identifying text (e.g. "include")
3591 count 0 if this is the only command line pattern, or
3592 number of the command line pattern, or
3593 linenumber for a pattern from a file
3594
3595Returns: TRUE on success, FALSE after an error
3596*/
3597
3598static BOOL
3599compile_pattern(patstr *p, int options, int fromfile, const char *fromtext,
3600 int count)
3601{
3602char *ps;
3603int errcode;
3604PCRE2_SIZE patlen, erroffset;
3605PCRE2_UCHAR errmessbuffer[ERRBUFSIZ];
3606
3607if (p->compiled != NULL) return TRUE;
3608ps = p->string;
3609patlen = p->length;
3610
3611if ((options & PCRE2_LITERAL) != 0)
3612 {
3613 int ellength;
3614 char *eop = ps + patlen;
3615 char *pe = end_of_line(ps, eop, &ellength);
3616
3617 if (ellength != 0)
3618 {
3619 patlen = pe - ps - ellength;
3620 if (add_pattern(pe, p->length-patlen-ellength, p) == NULL) return FALSE;
3621 }
3622 }
3623
3624p->compiled = pcre2_compile((PCRE2_SPTR)ps, patlen, options, &errcode,
3625 &erroffset, compile_context);
3626
3627/* Handle successful compile. Try JIT-compiling if supported and enabled. We
3628ignore any JIT compiler errors, relying falling back to interpreting if
3629anything goes wrong with JIT. */
3630
3631if (p->compiled != NULL)
3632 {
3633#ifdef SUPPORT_PCRE2GREP_JIT
3634 if (use_jit) (void)pcre2_jit_compile(p->compiled, PCRE2_JIT_COMPLETE);
3635#endif
3636 return TRUE;
3637 }
3638
3639/* Handle compile errors */
3640
3641if (erroffset > patlen) erroffset = patlen;
3642pcre2_get_error_message(errcode, errmessbuffer, sizeof(errmessbuffer));
3643
3644if (fromfile)
3645 {
3646 fprintf(stderr, "pcre2grep: Error in regex in line %d of %s "
3647 "at offset %d: %s\n", count, fromtext, (int)erroffset, errmessbuffer);
3648 }
3649else
3650 {
3651 if (count == 0)
3652 fprintf(stderr, "pcre2grep: Error in %s regex at offset %d: %s\n",
3653 fromtext, (int)erroffset, errmessbuffer);
3654 else
3655 fprintf(stderr, "pcre2grep: Error in %s %s regex at offset %d: %s\n",
3656 ordin(count), fromtext, (int)erroffset, errmessbuffer);
3657 }
3658
3659return FALSE;
3660}
3661
3662
3663
3664/*************************************************
3665* Read and compile a file of patterns *
3666*************************************************/
3667
3668/* This is used for --filelist, --include-from, and --exclude-from.
3669
3670Arguments:
3671 name the name of the file; "-" is stdin
3672 patptr pointer to the pattern chain anchor
3673 patlastptr pointer to the last pattern pointer
3674
3675Returns: TRUE if all went well
3676*/
3677
3678static BOOL
3679read_pattern_file(char *name, patstr **patptr, patstr **patlastptr)
3680{
3681int linenumber = 0;
3682PCRE2_SIZE patlen;
3683FILE *f;
3684const char *filename;
3685char buffer[MAXPATLEN+20];
3686
3687if (strcmp(name, "-") == 0)
3688 {
3689 f = stdin;
3690 filename = stdin_name;
3691 }
3692else
3693 {
3694 f = fopen(name, "r");
3695 if (f == NULL)
3696 {
3697 fprintf(stderr, "pcre2grep: Failed to open %s: %s\n", name, strerror(errno));
3698 return FALSE;
3699 }
3700 filename = name;
3701 }
3702
3703while ((patlen = read_one_line(buffer, sizeof(buffer), f)) > 0)
3704 {
3705 while (patlen > 0 && isspace((unsigned char)(buffer[patlen-1]))) patlen--;
3706 linenumber++;
3707 if (patlen == 0) continue; /* Skip blank lines */
3708
3709 /* Note: this call to add_pattern() puts a pointer to the local variable
3710 "buffer" into the pattern chain. However, that pointer is used only when
3711 compiling the pattern, which happens immediately below, so we flatten it
3712 afterwards, as a precaution against any later code trying to use it. */
3713
3714 *patlastptr = add_pattern(buffer, patlen, *patlastptr);
3715 if (*patlastptr == NULL)
3716 {
3717 if (f != stdin) fclose(f);
3718 return FALSE;
3719 }
3720 if (*patptr == NULL) *patptr = *patlastptr;
3721
3722 /* This loop is needed because compiling a "pattern" when -F is set may add
3723 on additional literal patterns if the original contains a newline. In the
3724 common case, it never will, because read_one_line() stops at a newline.
3725 However, the -N option can be used to give pcre2grep a different newline
3726 setting. */
3727
3728 for(;;)
3729 {
3730 if (!compile_pattern(*patlastptr, pcre2_options, TRUE, filename,
3731 linenumber))
3732 {
3733 if (f != stdin) fclose(f);
3734 return FALSE;
3735 }
3736 (*patlastptr)->string = NULL; /* Insurance */
3737 if ((*patlastptr)->next == NULL) break;
3738 *patlastptr = (*patlastptr)->next;
3739 }
3740 }
3741
3742if (f != stdin) fclose(f);
3743return TRUE;
3744}
3745
3746
3747
3748/*************************************************
3749* Main program *
3750*************************************************/
3751
3752/* Returns 0 if something matched, 1 if nothing matched, 2 after an error. */
3753
3754int
3755main(int argc, char **argv)
3756{
3757int i, j;
3758int rc = 1;
3759BOOL only_one_at_top;
3760patstr *cp;
3761fnstr *fn;
3762omstr *om;
3763const char *locale_from = "--locale";
3764
3765#ifdef SUPPORT_PCRE2GREP_JIT
3766pcre2_jit_stack *jit_stack = NULL;
3767#endif
3768
3769/* In Windows, stdout is set up as a text stream, which means that \n is
3770converted to \r\n. This causes output lines that are copied from the input to
3771change from ....\r\n to ....\r\r\n, which is not right. We therefore ensure
3772that stdout is a binary stream. Note that this means all other output to stdout
3773must use STDOUT_NL to terminate lines. */
3774
3775#ifdef WIN32
3776_setmode(_fileno(stdout), _O_BINARY);
3777#endif
3778
3779/* Process the options */
3780
3781for (i = 1; i < argc; i++)
3782 {
3783 option_item *op = NULL;
3784 char *option_data = (char *)""; /* default to keep compiler happy */
3785 BOOL longop;
3786 BOOL longopwasequals = FALSE;
3787
3788 if (argv[i][0] != '-') break;
3789
3790 /* If we hit an argument that is just "-", it may be a reference to STDIN,
3791 but only if we have previously had -e or -f to define the patterns. */
3792
3793 if (argv[i][1] == 0)
3794 {
3795 if (pattern_files != NULL || patterns != NULL) break;
3796 else pcre2grep_exit(usage(2));
3797 }
3798
3799 /* Handle a long name option, or -- to terminate the options */
3800
3801 if (argv[i][1] == '-')
3802 {
3803 char *arg = argv[i] + 2;
3804 char *argequals = strchr(arg, '=');
3805
3806 if (*arg == 0) /* -- terminates options */
3807 {
3808 i++;
3809 break; /* out of the options-handling loop */
3810 }
3811
3812 longop = TRUE;
3813
3814 /* Some long options have data that follows after =, for example file=name.
3815 Some options have variations in the long name spelling: specifically, we
3816 allow "regexp" because GNU grep allows it, though I personally go along
3817 with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".
3818 These options are entered in the table as "regex(p)". Options can be in
3819 both these categories. */
3820
3821 for (op = optionlist; op->one_char != 0; op++)
3822 {
3823 char *opbra = strchr(op->long_name, '(');
3824 char *equals = strchr(op->long_name, '=');
3825
3826 /* Handle options with only one spelling of the name */
3827
3828 if (opbra == NULL) /* Does not contain '(' */
3829 {
3830 if (equals == NULL) /* Not thing=data case */
3831 {
3832 if (strcmp(arg, op->long_name) == 0) break;
3833 }
3834 else /* Special case xxx=data */
3835 {
3836 int oplen = (int)(equals - op->long_name);
3837 int arglen = (argequals == NULL)?
3838 (int)strlen(arg) : (int)(argequals - arg);
3839 if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
3840 {
3841 option_data = arg + arglen;
3842 if (*option_data == '=')
3843 {
3844 option_data++;
3845 longopwasequals = TRUE;
3846 }
3847 break;
3848 }
3849 }
3850 }
3851
3852 /* Handle options with an alternate spelling of the name */
3853
3854 else
3855 {
3856 char buff1[24];
3857 char buff2[24];
3858 int ret;
3859
3860 int baselen = (int)(opbra - op->long_name);
3861 int fulllen = (int)(strchr(op->long_name, ')') - op->long_name + 1);
3862 int arglen = (argequals == NULL || equals == NULL)?
3863 (int)strlen(arg) : (int)(argequals - arg);
3864
3865 if ((ret = snprintf(buff1, sizeof(buff1), "%.*s", baselen, op->long_name),
3866 ret < 0 || ret > (int)sizeof(buff1)) ||
3867 (ret = snprintf(buff2, sizeof(buff2), "%s%.*s", buff1,
3868 fulllen - baselen - 2, opbra + 1),
3869 ret < 0 || ret > (int)sizeof(buff2)))
3870 {
3871 fprintf(stderr, "pcre2grep: Buffer overflow when parsing %s option\n",
3872 op->long_name);
3873 pcre2grep_exit(2);
3874 }
3875
3876 if (strncmp(arg, buff1, arglen) == 0 ||
3877 strncmp(arg, buff2, arglen) == 0)
3878 {
3879 if (equals != NULL && argequals != NULL)
3880 {
3881 option_data = argequals;
3882 if (*option_data == '=')
3883 {
3884 option_data++;
3885 longopwasequals = TRUE;
3886 }
3887 }
3888 break;
3889 }
3890 }
3891 }
3892
3893 if (op->one_char == 0)
3894 {
3895 fprintf(stderr, "pcre2grep: Unknown option %s\n", argv[i]);
3896 pcre2grep_exit(usage(2));
3897 }
3898 }
3899
Elliott Hughes5b808042021-10-01 10:56:10 -07003900 /* One-char options; many that have no data may be in a single argument; we
3901 continue till we hit the last one or one that needs data. */
3902
3903 else
3904 {
3905 char *s = argv[i] + 1;
3906 longop = FALSE;
3907
3908 while (*s != 0)
3909 {
3910 for (op = optionlist; op->one_char != 0; op++)
3911 {
3912 if (*s == op->one_char) break;
3913 }
3914 if (op->one_char == 0)
3915 {
3916 fprintf(stderr, "pcre2grep: Unknown option letter '%c' in \"%s\"\n",
3917 *s, argv[i]);
3918 pcre2grep_exit(usage(2));
3919 }
3920
3921 option_data = s+1;
3922
3923 /* Break out if this is the last character in the string; it's handled
3924 below like a single multi-char option. */
3925
3926 if (*option_data == 0) break;
3927
3928 /* Check for a single-character option that has data: OP_OP_NUMBER(S)
3929 are used for ones that either have a numerical number or defaults, i.e.
3930 the data is optional. If a digit follows, there is data; if not, carry on
3931 with other single-character options in the same string. */
3932
3933 if (op->type == OP_OP_NUMBER || op->type == OP_OP_NUMBERS)
3934 {
3935 if (isdigit((unsigned char)s[1])) break;
3936 }
3937 else /* Check for an option with data */
3938 {
3939 if (op->type != OP_NODATA) break;
3940 }
3941
3942 /* Handle a single-character option with no data, then loop for the
3943 next character in the string. */
3944
3945 pcre2_options = handle_option(*s++, pcre2_options);
3946 }
3947 }
3948
3949 /* At this point we should have op pointing to a matched option. If the type
3950 is NO_DATA, it means that there is no data, and the option might set
3951 something in the PCRE options. */
3952
3953 if (op->type == OP_NODATA)
3954 {
3955 pcre2_options = handle_option(op->one_char, pcre2_options);
3956 continue;
3957 }
3958
3959 /* If the option type is OP_OP_STRING or OP_OP_NUMBER(S), it's an option that
3960 either has a value or defaults to something. It cannot have data in a
3961 separate item. At the moment, the only such options are "colo(u)r",
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07003962 and "only-matching". */
Elliott Hughes5b808042021-10-01 10:56:10 -07003963
3964 if (*option_data == 0 &&
3965 (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER ||
3966 op->type == OP_OP_NUMBERS))
3967 {
3968 switch (op->one_char)
3969 {
3970 case N_COLOUR:
3971 colour_option = "auto";
3972 break;
3973
3974 case 'o':
3975 only_matching_last = add_number(0, only_matching_last);
3976 if (only_matching == NULL) only_matching = only_matching_last;
3977 break;
Elliott Hughes5b808042021-10-01 10:56:10 -07003978 }
3979 continue;
3980 }
3981
3982 /* Otherwise, find the data string for the option. */
3983
3984 if (*option_data == 0)
3985 {
3986 if (i >= argc - 1 || longopwasequals)
3987 {
3988 fprintf(stderr, "pcre2grep: Data missing after %s\n", argv[i]);
3989 pcre2grep_exit(usage(2));
3990 }
3991 option_data = argv[++i];
3992 }
3993
3994 /* If the option type is OP_OP_NUMBERS, the value is a number that is to be
3995 added to a chain of numbers. */
3996
3997 if (op->type == OP_OP_NUMBERS)
3998 {
3999 unsigned long int n = decode_number(option_data, op, longop);
4000 omdatastr *omd = (omdatastr *)op->dataptr;
4001 *(omd->lastptr) = add_number((int)n, *(omd->lastptr));
4002 if (*(omd->anchor) == NULL) *(omd->anchor) = *(omd->lastptr);
4003 }
4004
4005 /* If the option type is OP_PATLIST, it's the -e option, or one of the
4006 include/exclude options, which can be called multiple times to create lists
4007 of patterns. */
4008
4009 else if (op->type == OP_PATLIST)
4010 {
4011 patdatastr *pd = (patdatastr *)op->dataptr;
4012 *(pd->lastptr) = add_pattern(option_data, (PCRE2_SIZE)strlen(option_data),
4013 *(pd->lastptr));
4014 if (*(pd->lastptr) == NULL) goto EXIT2;
4015 if (*(pd->anchor) == NULL) *(pd->anchor) = *(pd->lastptr);
4016 }
4017
4018 /* If the option type is OP_FILELIST, it's one of the options that names a
4019 file. */
4020
4021 else if (op->type == OP_FILELIST)
4022 {
4023 fndatastr *fd = (fndatastr *)op->dataptr;
4024 fn = (fnstr *)malloc(sizeof(fnstr));
4025 if (fn == NULL)
4026 {
4027 fprintf(stderr, "pcre2grep: malloc failed\n");
4028 goto EXIT2;
4029 }
4030 fn->next = NULL;
4031 fn->name = option_data;
4032 if (*(fd->anchor) == NULL)
4033 *(fd->anchor) = fn;
4034 else
4035 (*(fd->lastptr))->next = fn;
4036 *(fd->lastptr) = fn;
4037 }
4038
4039 /* Handle OP_BINARY_FILES */
4040
4041 else if (op->type == OP_BINFILES)
4042 {
4043 if (strcmp(option_data, "binary") == 0)
4044 binary_files = BIN_BINARY;
4045 else if (strcmp(option_data, "without-match") == 0)
4046 binary_files = BIN_NOMATCH;
4047 else if (strcmp(option_data, "text") == 0)
4048 binary_files = BIN_TEXT;
4049 else
4050 {
4051 fprintf(stderr, "pcre2grep: unknown value \"%s\" for binary-files\n",
4052 option_data);
4053 pcre2grep_exit(usage(2));
4054 }
4055 }
4056
4057 /* Otherwise, deal with a single string or numeric data value. */
4058
4059 else if (op->type != OP_NUMBER && op->type != OP_U32NUMBER &&
4060 op->type != OP_OP_NUMBER && op->type != OP_SIZE)
4061 {
4062 *((char **)op->dataptr) = option_data;
4063 }
4064 else
4065 {
4066 unsigned long int n = decode_number(option_data, op, longop);
4067 if (op->type == OP_U32NUMBER) *((uint32_t *)op->dataptr) = n;
4068 else if (op->type == OP_SIZE) *((PCRE2_SIZE *)op->dataptr) = n;
4069 else *((int *)op->dataptr) = n;
4070 }
4071 }
4072
4073/* Options have been decoded. If -C was used, its value is used as a default
4074for -A and -B. */
4075
4076if (both_context > 0)
4077 {
4078 if (after_context == 0) after_context = both_context;
4079 if (before_context == 0) before_context = both_context;
4080 }
4081
4082/* Only one of --only-matching, --output, --file-offsets, or --line-offsets is
4083permitted. They display, each in their own way, only the data that has matched.
4084*/
4085
4086only_matching_count = (only_matching != NULL) + (output_text != NULL) +
4087 file_offsets + line_offsets;
4088
4089if (only_matching_count > 1)
4090 {
4091 fprintf(stderr, "pcre2grep: Cannot mix --only-matching, --output, "
4092 "--file-offsets and/or --line-offsets\n");
4093 pcre2grep_exit(usage(2));
4094 }
4095
4096
4097/* Check that there is a big enough ovector for all -o settings. */
4098
4099for (om = only_matching; om != NULL; om = om->next)
4100 {
4101 int n = om->groupnum;
4102 if (n > (int)capture_max)
4103 {
4104 fprintf(stderr, "pcre2grep: Requested group %d cannot be captured.\n", n);
4105 fprintf(stderr, "pcre2grep: Use --om-capture to increase the size of the capture vector.\n");
4106 goto EXIT2;
4107 }
4108 }
4109
4110/* Check the text supplied to --output for errors. */
4111
4112if (output_text != NULL &&
4113 !syntax_check_output_text((PCRE2_SPTR)output_text, FALSE))
4114 goto EXIT2;
4115
4116/* Set up default compile and match contexts and a match data block. */
4117
4118offset_size = capture_max + 1;
4119compile_context = pcre2_compile_context_create(NULL);
4120match_context = pcre2_match_context_create(NULL);
4121match_data = pcre2_match_data_create(offset_size, NULL);
4122offsets = pcre2_get_ovector_pointer(match_data);
4123
4124/* If string (script) callouts are supported, set up the callout processing
4125function. */
4126
4127#ifdef SUPPORT_PCRE2GREP_CALLOUT
4128pcre2_set_callout(match_context, pcre2grep_callout, NULL);
4129#endif
4130
4131/* Put limits into the match data block. */
4132
4133if (heap_limit != PCRE2_UNSET) pcre2_set_heap_limit(match_context, heap_limit);
4134if (match_limit > 0) pcre2_set_match_limit(match_context, match_limit);
4135if (depth_limit > 0) pcre2_set_depth_limit(match_context, depth_limit);
4136
4137/* If a locale has not been provided as an option, see if the LC_CTYPE or
4138LC_ALL environment variable is set, and if so, use it. */
4139
4140if (locale == NULL)
4141 {
4142 locale = getenv("LC_ALL");
4143 locale_from = "LC_ALL";
4144 }
4145
4146if (locale == NULL)
4147 {
4148 locale = getenv("LC_CTYPE");
4149 locale_from = "LC_CTYPE";
4150 }
4151
4152/* If a locale is set, use it to generate the tables the PCRE needs. Passing
4153NULL to pcre2_maketables() means that malloc() is used to get the memory. */
4154
4155if (locale != NULL)
4156 {
4157 if (setlocale(LC_CTYPE, locale) == NULL)
4158 {
4159 fprintf(stderr, "pcre2grep: Failed to set locale %s (obtained from %s)\n",
4160 locale, locale_from);
4161 goto EXIT2;
4162 }
4163 character_tables = pcre2_maketables(NULL);
4164 pcre2_set_character_tables(compile_context, character_tables);
4165 }
4166
4167/* Sort out colouring */
4168
4169if (colour_option != NULL && strcmp(colour_option, "never") != 0)
4170 {
4171 if (strcmp(colour_option, "always") == 0)
4172#ifdef WIN32
4173 do_ansi = !is_stdout_tty(),
4174#endif
4175 do_colour = TRUE;
4176 else if (strcmp(colour_option, "auto") == 0) do_colour = is_stdout_tty();
4177 else
4178 {
4179 fprintf(stderr, "pcre2grep: Unknown colour setting \"%s\"\n",
4180 colour_option);
4181 goto EXIT2;
4182 }
4183 if (do_colour)
4184 {
4185 char *cs = getenv("PCRE2GREP_COLOUR");
4186 if (cs == NULL) cs = getenv("PCRE2GREP_COLOR");
4187 if (cs == NULL) cs = getenv("PCREGREP_COLOUR");
4188 if (cs == NULL) cs = getenv("PCREGREP_COLOR");
4189 if (cs == NULL) cs = parse_grep_colors(getenv("GREP_COLORS"));
4190 if (cs == NULL) cs = getenv("GREP_COLOR");
4191 if (cs != NULL)
4192 {
4193 if (strspn(cs, ";0123456789") == strlen(cs)) colour_string = cs;
4194 }
4195#ifdef WIN32
4196 init_colour_output();
4197#endif
4198 }
4199 }
4200
4201/* Sort out a newline setting. */
4202
4203if (newline_arg != NULL)
4204 {
4205 for (endlinetype = 1; endlinetype < (int)(sizeof(newlines)/sizeof(char *));
4206 endlinetype++)
4207 {
4208 if (strcmpic(newline_arg, newlines[endlinetype]) == 0) break;
4209 }
4210 if (endlinetype < (int)(sizeof(newlines)/sizeof(char *)))
4211 pcre2_set_newline(compile_context, endlinetype);
4212 else
4213 {
4214 fprintf(stderr, "pcre2grep: Invalid newline specifier \"%s\"\n",
4215 newline_arg);
4216 goto EXIT2;
4217 }
4218 }
4219
4220/* Find default newline convention */
4221
4222else
4223 {
4224 (void)pcre2_config(PCRE2_CONFIG_NEWLINE, &endlinetype);
4225 }
4226
4227/* Interpret the text values for -d and -D */
4228
4229if (dee_option != NULL)
4230 {
4231 if (strcmp(dee_option, "read") == 0) dee_action = dee_READ;
4232 else if (strcmp(dee_option, "recurse") == 0) dee_action = dee_RECURSE;
4233 else if (strcmp(dee_option, "skip") == 0) dee_action = dee_SKIP;
4234 else
4235 {
4236 fprintf(stderr, "pcre2grep: Invalid value \"%s\" for -d\n", dee_option);
4237 goto EXIT2;
4238 }
4239 }
4240
4241if (DEE_option != NULL)
4242 {
4243 if (strcmp(DEE_option, "read") == 0) DEE_action = DEE_READ;
4244 else if (strcmp(DEE_option, "skip") == 0) DEE_action = DEE_SKIP;
4245 else
4246 {
4247 fprintf(stderr, "pcre2grep: Invalid value \"%s\" for -D\n", DEE_option);
4248 goto EXIT2;
4249 }
4250 }
4251
4252/* Set the extra options */
4253
4254(void)pcre2_set_compile_extra_options(compile_context, extra_options);
4255
4256/* Check the values for Jeffrey Friedl's debugging options. */
4257
Elliott Hughes5b808042021-10-01 10:56:10 -07004258/* If use_jit is set, check whether JIT is available. If not, do not try
4259to use JIT. */
4260
4261if (use_jit)
4262 {
4263 uint32_t answer;
4264 (void)pcre2_config(PCRE2_CONFIG_JIT, &answer);
4265 if (!answer) use_jit = FALSE;
4266 }
4267
4268/* Get memory for the main buffer. */
4269
4270if (bufthird <= 0)
4271 {
4272 fprintf(stderr, "pcre2grep: --buffer-size must be greater than zero\n");
4273 goto EXIT2;
4274 }
4275
4276bufsize = 3*bufthird;
4277main_buffer = (char *)malloc(bufsize);
4278
4279if (main_buffer == NULL)
4280 {
4281 fprintf(stderr, "pcre2grep: malloc failed\n");
4282 goto EXIT2;
4283 }
4284
4285/* If no patterns were provided by -e, and there are no files provided by -f,
4286the first argument is the one and only pattern, and it must exist. */
4287
4288if (patterns == NULL && pattern_files == NULL)
4289 {
4290 if (i >= argc) return usage(2);
4291 patterns = patterns_last = add_pattern(argv[i], (PCRE2_SIZE)strlen(argv[i]),
4292 NULL);
4293 i++;
4294 if (patterns == NULL) goto EXIT2;
4295 }
4296
4297/* Compile the patterns that were provided on the command line, either by
4298multiple uses of -e or as a single unkeyed pattern. We cannot do this until
4299after all the command-line options are read so that we know which PCRE options
4300to use. When -F is used, compile_pattern() may add another block into the
4301chain, so we must not access the next pointer till after the compile. */
4302
4303for (j = 1, cp = patterns; cp != NULL; j++, cp = cp->next)
4304 {
4305 if (!compile_pattern(cp, pcre2_options, FALSE, "command-line",
4306 (j == 1 && patterns->next == NULL)? 0 : j))
4307 goto EXIT2;
4308 }
4309
4310/* Read and compile the regular expressions that are provided in files. */
4311
4312for (fn = pattern_files; fn != NULL; fn = fn->next)
4313 {
4314 if (!read_pattern_file(fn->name, &patterns, &patterns_last)) goto EXIT2;
4315 }
4316
4317/* Unless JIT has been explicitly disabled, arrange a stack for it to use. */
4318
4319#ifdef SUPPORT_PCRE2GREP_JIT
4320if (use_jit)
4321 {
4322 jit_stack = pcre2_jit_stack_create(32*1024, 1024*1024, NULL);
4323 if (jit_stack != NULL )
4324 pcre2_jit_stack_assign(match_context, NULL, jit_stack);
4325 }
4326#endif
4327
4328/* -F, -w, and -x do not apply to include or exclude patterns, so we must
4329adjust the options. */
4330
4331pcre2_options &= ~PCRE2_LITERAL;
4332(void)pcre2_set_compile_extra_options(compile_context, 0);
4333
4334/* If there are include or exclude patterns read from the command line, compile
4335them. */
4336
4337for (j = 0; j < 4; j++)
4338 {
4339 int k;
4340 for (k = 1, cp = *(incexlist[j]); cp != NULL; k++, cp = cp->next)
4341 {
4342 if (!compile_pattern(cp, pcre2_options, FALSE, incexname[j],
4343 (k == 1 && cp->next == NULL)? 0 : k))
4344 goto EXIT2;
4345 }
4346 }
4347
4348/* Read and compile include/exclude patterns from files. */
4349
4350for (fn = include_from; fn != NULL; fn = fn->next)
4351 {
4352 if (!read_pattern_file(fn->name, &include_patterns, &include_patterns_last))
4353 goto EXIT2;
4354 }
4355
4356for (fn = exclude_from; fn != NULL; fn = fn->next)
4357 {
4358 if (!read_pattern_file(fn->name, &exclude_patterns, &exclude_patterns_last))
4359 goto EXIT2;
4360 }
4361
4362/* If there are no files that contain lists of files to search, and there are
4363no file arguments, search stdin, and then exit. */
4364
4365if (file_lists == NULL && i >= argc)
4366 {
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07004367 /* Using a buffered stdin, that then is seek is not portable,
4368 so attempt to remove the buffer, to workaround reported issues
4369 affecting several BSD and AIX */
4370 if (count_limit >= 0)
4371 setbuf(stdin, NULL);
Elliott Hughes5b808042021-10-01 10:56:10 -07004372 rc = pcre2grep(stdin, FR_PLAIN, stdin_name,
4373 (filenames > FN_DEFAULT)? stdin_name : NULL);
4374 goto EXIT;
4375 }
4376
4377/* If any files that contains a list of files to search have been specified,
4378read them line by line and search the given files. */
4379
4380for (fn = file_lists; fn != NULL; fn = fn->next)
4381 {
4382 char buffer[FNBUFSIZ];
4383 FILE *fl;
4384 if (strcmp(fn->name, "-") == 0) fl = stdin; else
4385 {
4386 fl = fopen(fn->name, "rb");
4387 if (fl == NULL)
4388 {
4389 fprintf(stderr, "pcre2grep: Failed to open %s: %s\n", fn->name,
4390 strerror(errno));
4391 goto EXIT2;
4392 }
4393 }
4394 while (fgets(buffer, sizeof(buffer), fl) != NULL)
4395 {
4396 int frc;
4397 char *end = buffer + (int)strlen(buffer);
4398 while (end > buffer && isspace(end[-1])) end--;
4399 *end = 0;
4400 if (*buffer != 0)
4401 {
4402 frc = grep_or_recurse(buffer, dee_action == dee_RECURSE, FALSE);
4403 if (frc > 1) rc = frc;
4404 else if (frc == 0 && rc == 1) rc = 0;
4405 }
4406 }
4407 if (fl != stdin) fclose(fl);
4408 }
4409
4410/* After handling file-list, work through remaining arguments. Pass in the fact
4411that there is only one argument at top level - this suppresses the file name if
4412the argument is not a directory and filenames are not otherwise forced. */
4413
4414only_one_at_top = i == argc - 1 && file_lists == NULL;
4415
4416for (; i < argc; i++)
4417 {
4418 int frc = grep_or_recurse(argv[i], dee_action == dee_RECURSE,
4419 only_one_at_top);
4420 if (frc > 1) rc = frc;
4421 else if (frc == 0 && rc == 1) rc = 0;
4422 }
4423
4424#ifdef SUPPORT_PCRE2GREP_CALLOUT
4425/* If separating builtin echo callouts by implicit newline, add one more for
4426the final item. */
4427
4428if (om_separator != NULL && strcmp(om_separator, STDOUT_NL) == 0)
4429 fprintf(stdout, STDOUT_NL);
4430#endif
4431
4432/* Show the total number of matches if requested, but not if only one file's
4433count was printed. */
4434
4435if (show_total_count && counts_printed != 1 && filenames != FN_NOMATCH_ONLY)
4436 {
4437 if (counts_printed != 0 && filenames >= FN_DEFAULT)
4438 fprintf(stdout, "TOTAL:");
4439 fprintf(stdout, "%lu" STDOUT_NL, total_count);
4440 }
4441
4442EXIT:
4443#ifdef SUPPORT_PCRE2GREP_JIT
4444pcre2_jit_free_unused_memory(NULL);
4445if (jit_stack != NULL) pcre2_jit_stack_free(jit_stack);
4446#endif
4447
4448free(main_buffer);
4449if (character_tables != NULL) pcre2_maketables_free(NULL, character_tables);
4450
4451pcre2_compile_context_free(compile_context);
4452pcre2_match_context_free(match_context);
4453pcre2_match_data_free(match_data);
4454
4455free_pattern_chain(patterns);
4456free_pattern_chain(include_patterns);
4457free_pattern_chain(include_dir_patterns);
4458free_pattern_chain(exclude_patterns);
4459free_pattern_chain(exclude_dir_patterns);
4460
4461free_file_chain(exclude_from);
4462free_file_chain(include_from);
4463free_file_chain(pattern_files);
4464free_file_chain(file_lists);
4465
4466while (only_matching != NULL)
4467 {
4468 omstr *this = only_matching;
4469 only_matching = this->next;
4470 free(this);
4471 }
4472
4473pcre2grep_exit(rc);
4474
4475EXIT2:
4476rc = 2;
4477goto EXIT;
4478}
4479
4480/* End of pcre2grep */