Elliott Hughes | 2dbd7d2 | 2020-06-03 14:32:37 -0700 | [diff] [blame] | 1 | /************************************************* |
| 2 | * Perl-Compatible Regular Expressions * |
| 3 | *************************************************/ |
| 4 | |
| 5 | /* PCRE is a library of functions to support regular expressions whose syntax |
| 6 | and semantics are as close as possible to those of the Perl 5 language. |
| 7 | |
| 8 | Written by Philip Hazel |
| 9 | Original API code Copyright (c) 1997-2012 University of Cambridge |
| 10 | New API code Copyright (c) 2016-2020 University of Cambridge |
| 11 | |
| 12 | ----------------------------------------------------------------------------- |
| 13 | Redistribution and use in source and binary forms, with or without |
| 14 | modification, are permitted provided that the following conditions are met: |
| 15 | |
| 16 | * Redistributions of source code must retain the above copyright notice, |
| 17 | this list of conditions and the following disclaimer. |
| 18 | |
| 19 | * Redistributions in binary form must reproduce the above copyright |
| 20 | notice, this list of conditions and the following disclaimer in the |
| 21 | documentation and/or other materials provided with the distribution. |
| 22 | |
| 23 | * Neither the name of the University of Cambridge nor the names of its |
| 24 | contributors may be used to endorse or promote products derived from |
| 25 | this software without specific prior written permission. |
| 26 | |
| 27 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
| 28 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| 29 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
| 30 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE |
| 31 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
| 32 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
| 33 | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
| 34 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
| 35 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
| 36 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
| 37 | POSSIBILITY OF SUCH DAMAGE. |
| 38 | ----------------------------------------------------------------------------- |
| 39 | */ |
| 40 | |
| 41 | |
| 42 | /* This is a freestanding support program to generate a file containing |
| 43 | character tables for PCRE2. The tables are built using the pcre2_maketables() |
| 44 | function, which is part of the PCRE2 API. By default, the system's "C" locale |
| 45 | is used rather than what the building user happens to have set, but the -L |
| 46 | option can be used to select the current locale from the LC_ALL environment |
| 47 | variable. By default, the tables are written in source form, but if -b is |
| 48 | given, they are written in binary. */ |
| 49 | |
| 50 | #ifdef HAVE_CONFIG_H |
| 51 | #include "config.h" |
| 52 | #endif |
| 53 | |
| 54 | #include <ctype.h> |
| 55 | #include <stdio.h> |
| 56 | #include <string.h> |
| 57 | #include <locale.h> |
| 58 | |
| 59 | #define PCRE2_CODE_UNIT_WIDTH 0 /* Must be set, but not relevant here */ |
| 60 | #include "pcre2_internal.h" |
| 61 | |
| 62 | #define PCRE2_DFTABLES /* pcre2_maketables.c notices this */ |
| 63 | #include "pcre2_maketables.c" |
| 64 | |
| 65 | |
Elliott Hughes | 3435c42 | 2020-12-04 13:18:28 -0800 | [diff] [blame] | 66 | static const char *classlist[] = |
Elliott Hughes | 2dbd7d2 | 2020-06-03 14:32:37 -0700 | [diff] [blame] | 67 | { |
| 68 | "space", "xdigit", "digit", "upper", "lower", |
| 69 | "word", "graph", "print", "punct", "cntrl" |
| 70 | }; |
| 71 | |
| 72 | |
| 73 | |
| 74 | /************************************************* |
| 75 | * Usage * |
| 76 | *************************************************/ |
| 77 | |
| 78 | static void |
| 79 | usage(void) |
| 80 | { |
| 81 | (void)fprintf(stderr, |
| 82 | "Usage: pcre2_dftables [options] <output file>\n" |
| 83 | " -b Write output in binary (default is source code)\n" |
| 84 | " -L Use locale from LC_ALL (default is \"C\" locale)\n" |
| 85 | ); |
| 86 | } |
| 87 | |
| 88 | |
| 89 | |
| 90 | /************************************************* |
| 91 | * Entry point * |
| 92 | *************************************************/ |
| 93 | |
| 94 | int main(int argc, char **argv) |
| 95 | { |
| 96 | FILE *f; |
| 97 | int i; |
| 98 | int nclass = 0; |
| 99 | BOOL binary = FALSE; |
Elliott Hughes | 3435c42 | 2020-12-04 13:18:28 -0800 | [diff] [blame] | 100 | char *env = (char *)"C"; |
Elliott Hughes | 2dbd7d2 | 2020-06-03 14:32:37 -0700 | [diff] [blame] | 101 | const unsigned char *tables; |
| 102 | const unsigned char *base_of_tables; |
| 103 | |
| 104 | /* Process options */ |
| 105 | |
| 106 | for (i = 1; i < argc; i++) |
| 107 | { |
Elliott Hughes | 3435c42 | 2020-12-04 13:18:28 -0800 | [diff] [blame] | 108 | char *arg = argv[i]; |
Elliott Hughes | 2dbd7d2 | 2020-06-03 14:32:37 -0700 | [diff] [blame] | 109 | if (*arg != '-') break; |
| 110 | |
| 111 | if (strcmp(arg, "-help") == 0 || strcmp(arg, "--help") == 0) |
| 112 | { |
| 113 | usage(); |
| 114 | return 0; |
| 115 | } |
| 116 | |
| 117 | else if (strcmp(arg, "-L") == 0) |
| 118 | { |
| 119 | if (setlocale(LC_ALL, "") == NULL) |
| 120 | { |
| 121 | (void)fprintf(stderr, "pcre2_dftables: setlocale() failed\n"); |
| 122 | return 1; |
| 123 | } |
| 124 | env = getenv("LC_ALL"); |
| 125 | } |
| 126 | |
| 127 | else if (strcmp(arg, "-b") == 0) |
| 128 | binary = TRUE; |
| 129 | |
| 130 | else |
| 131 | { |
| 132 | (void)fprintf(stderr, "pcre2_dftables: unrecognized option %s\n", arg); |
| 133 | return 1; |
| 134 | } |
| 135 | } |
| 136 | |
| 137 | if (i != argc - 1) |
| 138 | { |
| 139 | (void)fprintf(stderr, "pcre2_dftables: one filename argument is required\n"); |
| 140 | return 1; |
| 141 | } |
| 142 | |
| 143 | /* Make the tables */ |
| 144 | |
| 145 | tables = maketables(); |
| 146 | base_of_tables = tables; |
| 147 | |
| 148 | f = fopen(argv[i], "wb"); |
| 149 | if (f == NULL) |
| 150 | { |
| 151 | fprintf(stderr, "pcre2_dftables: failed to open %s for writing\n", argv[1]); |
| 152 | return 1; |
| 153 | } |
| 154 | |
| 155 | /* If -b was specified, we write the tables in binary. */ |
| 156 | |
| 157 | if (binary) |
| 158 | { |
| 159 | int yield = 0; |
| 160 | size_t len = fwrite(tables, 1, TABLES_LENGTH, f); |
| 161 | if (len != TABLES_LENGTH) |
| 162 | { |
| 163 | (void)fprintf(stderr, "pcre2_dftables: fwrite() returned wrong length %d " |
| 164 | "instead of %d\n", (int)len, TABLES_LENGTH); |
| 165 | yield = 1; |
| 166 | } |
| 167 | fclose(f); |
| 168 | free((void *)base_of_tables); |
| 169 | return yield; |
| 170 | } |
| 171 | |
| 172 | /* Write the tables as source code for inclusion in the PCRE2 library. There |
| 173 | are several fprintf() calls here, because gcc in pedantic mode complains about |
| 174 | the very long string otherwise. */ |
| 175 | |
| 176 | (void)fprintf(f, |
| 177 | "/*************************************************\n" |
| 178 | "* Perl-Compatible Regular Expressions *\n" |
| 179 | "*************************************************/\n\n" |
| 180 | "/* This file was automatically written by the pcre2_dftables auxiliary\n" |
| 181 | "program. It contains character tables that are used when no external\n" |
| 182 | "tables are passed to PCRE2 by the application that calls it. The tables\n" |
| 183 | "are used only for characters whose code values are less than 256. */\n\n"); |
| 184 | |
| 185 | (void)fprintf(f, |
| 186 | "/* This set of tables was written in the %s locale. */\n\n", env); |
| 187 | |
| 188 | (void)fprintf(f, |
| 189 | "/* The pcre2_ftables program (which is distributed with PCRE2) can be used\n" |
| 190 | "to build alternative versions of this file. This is necessary if you are\n" |
| 191 | "running in an EBCDIC environment, or if you want to default to a different\n" |
| 192 | "encoding, for example ISO-8859-1. When pcre2_dftables is run, it creates\n" |
| 193 | "these tables in the \"C\" locale by default. This happens automatically if\n" |
| 194 | "PCRE2 is configured with --enable-rebuild-chartables. However, you can run\n" |
| 195 | "pcre2_dftables manually with the -L option to build tables using the LC_ALL\n" |
| 196 | "locale. */\n\n"); |
| 197 | |
| 198 | /* Force config.h in z/OS */ |
| 199 | |
| 200 | #if defined NATIVE_ZOS |
| 201 | (void)fprintf(f, |
| 202 | "/* For z/OS, config.h is forced */\n" |
| 203 | "#ifndef HAVE_CONFIG_H\n" |
| 204 | "#define HAVE_CONFIG_H 1\n" |
| 205 | "#endif\n\n"); |
| 206 | #endif |
| 207 | |
| 208 | (void)fprintf(f, |
| 209 | "/* The following #include is present because without it gcc 4.x may remove\n" |
| 210 | "the array definition from the final binary if PCRE2 is built into a static\n" |
| 211 | "library and dead code stripping is activated. This leads to link errors.\n" |
| 212 | "Pulling in the header ensures that the array gets flagged as \"someone\n" |
| 213 | "outside this compilation unit might reference this\" and so it will always\n" |
| 214 | "be supplied to the linker. */\n\n"); |
| 215 | |
| 216 | (void)fprintf(f, |
| 217 | "#ifdef HAVE_CONFIG_H\n" |
| 218 | "#include \"config.h\"\n" |
| 219 | "#endif\n\n" |
| 220 | "#include \"pcre2_internal.h\"\n\n"); |
| 221 | |
| 222 | (void)fprintf(f, |
| 223 | "const uint8_t PRIV(default_tables)[] = {\n\n" |
| 224 | "/* This table is a lower casing table. */\n\n"); |
| 225 | |
| 226 | (void)fprintf(f, " "); |
| 227 | for (i = 0; i < 256; i++) |
| 228 | { |
| 229 | if ((i & 7) == 0 && i != 0) fprintf(f, "\n "); |
| 230 | fprintf(f, "%3d", *tables++); |
| 231 | if (i != 255) fprintf(f, ","); |
| 232 | } |
| 233 | (void)fprintf(f, ",\n\n"); |
| 234 | |
| 235 | (void)fprintf(f, "/* This table is a case flipping table. */\n\n"); |
| 236 | |
| 237 | (void)fprintf(f, " "); |
| 238 | for (i = 0; i < 256; i++) |
| 239 | { |
| 240 | if ((i & 7) == 0 && i != 0) fprintf(f, "\n "); |
| 241 | fprintf(f, "%3d", *tables++); |
| 242 | if (i != 255) fprintf(f, ","); |
| 243 | } |
| 244 | (void)fprintf(f, ",\n\n"); |
| 245 | |
| 246 | (void)fprintf(f, |
| 247 | "/* This table contains bit maps for various character classes. Each map is 32\n" |
| 248 | "bytes long and the bits run from the least significant end of each byte. The\n" |
| 249 | "classes that have their own maps are: space, xdigit, digit, upper, lower, word,\n" |
| 250 | "graph, print, punct, and cntrl. Other classes are built from combinations. */\n\n"); |
| 251 | |
| 252 | (void)fprintf(f, " "); |
| 253 | for (i = 0; i < cbit_length; i++) |
| 254 | { |
| 255 | if ((i & 7) == 0 && i != 0) |
| 256 | { |
| 257 | if ((i & 31) == 0) (void)fprintf(f, "\n"); |
| 258 | if ((i & 24) == 8) (void)fprintf(f, " /* %s */", classlist[nclass++]); |
| 259 | (void)fprintf(f, "\n "); |
| 260 | } |
| 261 | (void)fprintf(f, "0x%02x", *tables++); |
| 262 | if (i != cbit_length - 1) (void)fprintf(f, ","); |
| 263 | } |
| 264 | (void)fprintf(f, ",\n\n"); |
| 265 | |
| 266 | (void)fprintf(f, |
| 267 | "/* This table identifies various classes of character by individual bits:\n" |
| 268 | " 0x%02x white space character\n" |
| 269 | " 0x%02x letter\n" |
| 270 | " 0x%02x lower case letter\n" |
| 271 | " 0x%02x decimal digit\n" |
| 272 | " 0x%02x alphanumeric or '_'\n*/\n\n", |
| 273 | ctype_space, ctype_letter, ctype_lcletter, ctype_digit, ctype_word); |
| 274 | |
| 275 | (void)fprintf(f, " "); |
| 276 | for (i = 0; i < 256; i++) |
| 277 | { |
| 278 | if ((i & 7) == 0 && i != 0) |
| 279 | { |
| 280 | (void)fprintf(f, " /* "); |
| 281 | if (isprint(i-8)) (void)fprintf(f, " %c -", i-8); |
| 282 | else (void)fprintf(f, "%3d-", i-8); |
| 283 | if (isprint(i-1)) (void)fprintf(f, " %c ", i-1); |
| 284 | else (void)fprintf(f, "%3d", i-1); |
| 285 | (void)fprintf(f, " */\n "); |
| 286 | } |
| 287 | (void)fprintf(f, "0x%02x", *tables++); |
| 288 | if (i != 255) (void)fprintf(f, ","); |
| 289 | } |
| 290 | |
| 291 | (void)fprintf(f, "};/* "); |
| 292 | if (isprint(i-8)) (void)fprintf(f, " %c -", i-8); |
| 293 | else (void)fprintf(f, "%3d-", i-8); |
| 294 | if (isprint(i-1)) (void)fprintf(f, " %c ", i-1); |
| 295 | else (void)fprintf(f, "%3d", i-1); |
| 296 | (void)fprintf(f, " */\n\n/* End of pcre2_chartables.c */\n"); |
| 297 | |
| 298 | fclose(f); |
| 299 | free((void *)base_of_tables); |
| 300 | return 0; |
| 301 | } |
| 302 | |
| 303 | /* End of pcre2_dftables.c */ |