| 9487f7f | 2011-08-03 07:05:30 -0700 | [diff] [blame] | 1 | /*************************************************************************** |
| 2 | * _ _ ____ _ |
| 3 | * Project ___| | | | _ \| | |
| 4 | * / __| | | | |_) | | |
| 5 | * | (__| |_| | _ <| |___ |
| 6 | * \___|\___/|_| \_\_____| |
| 7 | * |
| 8 | * Copyright (C) 1998 - 2010, Daniel Stenberg, <daniel@haxx.se>, et al. |
| 9 | * |
| 10 | * This software is licensed as described in the file COPYING, which |
| 11 | * you should have received as part of this distribution. The terms |
| 12 | * are also available at http://curl.haxx.se/docs/copyright.html. |
| 13 | * |
| 14 | * You may opt to use, copy, modify, merge, publish, distribute and/or sell |
| 15 | * copies of the Software, and permit persons to whom the Software is |
| 16 | * furnished to do so, under the terms of the COPYING file. |
| 17 | * |
| 18 | * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY |
| 19 | * KIND, either express or implied. |
| 20 | * |
| 21 | ***************************************************************************/ |
| 22 | |
| 23 | #include "setup.h" |
| 24 | |
| 25 | #include "curl_fnmatch.h" |
| 26 | |
| 27 | #define _MPRINTF_REPLACE /* use our functions only */ |
| 28 | #include <curl/mprintf.h> |
| 29 | |
| 30 | #include "curl_memory.h" |
| 31 | /* The last #include file should be: */ |
| 32 | #include "memdebug.h" |
| 33 | |
| 34 | #define CURLFNM_CHARSET_LEN (sizeof(char) * 256) |
| 35 | #define CURLFNM_CHSET_SIZE (CURLFNM_CHARSET_LEN + 15) |
| 36 | |
| 37 | #define CURLFNM_NEGATE CURLFNM_CHARSET_LEN |
| 38 | |
| 39 | #define CURLFNM_ALNUM (CURLFNM_CHARSET_LEN + 1) |
| 40 | #define CURLFNM_DIGIT (CURLFNM_CHARSET_LEN + 2) |
| 41 | #define CURLFNM_XDIGIT (CURLFNM_CHARSET_LEN + 3) |
| 42 | #define CURLFNM_ALPHA (CURLFNM_CHARSET_LEN + 4) |
| 43 | #define CURLFNM_PRINT (CURLFNM_CHARSET_LEN + 5) |
| 44 | #define CURLFNM_BLANK (CURLFNM_CHARSET_LEN + 6) |
| 45 | #define CURLFNM_LOWER (CURLFNM_CHARSET_LEN + 7) |
| 46 | #define CURLFNM_GRAPH (CURLFNM_CHARSET_LEN + 8) |
| 47 | #define CURLFNM_SPACE (CURLFNM_CHARSET_LEN + 9) |
| 48 | #define CURLFNM_UPPER (CURLFNM_CHARSET_LEN + 10) |
| 49 | |
| 50 | typedef enum { |
| 51 | CURLFNM_LOOP_DEFAULT = 0, |
| 52 | CURLFNM_LOOP_BACKSLASH |
| 53 | } loop_state; |
| 54 | |
| 55 | typedef enum { |
| 56 | CURLFNM_SCHS_DEFAULT = 0, |
| 57 | CURLFNM_SCHS_MAYRANGE, |
| 58 | CURLFNM_SCHS_MAYRANGE2, |
| 59 | CURLFNM_SCHS_RIGHTBR, |
| 60 | CURLFNM_SCHS_RIGHTBRLEFTBR |
| 61 | } setcharset_state; |
| 62 | |
| 63 | typedef enum { |
| 64 | CURLFNM_PKW_INIT = 0, |
| 65 | CURLFNM_PKW_DDOT |
| 66 | } parsekey_state; |
| 67 | |
| 68 | #define SETCHARSET_OK 1 |
| 69 | #define SETCHARSET_FAIL 0 |
| 70 | |
| 71 | static int parsekeyword(unsigned char **pattern, unsigned char *charset) |
| 72 | { |
| 73 | parsekey_state state = CURLFNM_PKW_INIT; |
| 74 | #define KEYLEN 10 |
| 75 | char keyword[KEYLEN] = { 0 }; |
| 76 | int found = FALSE; |
| 77 | int i; |
| 78 | unsigned char *p = *pattern; |
| 79 | for(i = 0; !found; i++) { |
| 80 | char c = *p++; |
| 81 | if(i >= KEYLEN) |
| 82 | return SETCHARSET_FAIL; |
| 83 | switch(state) { |
| 84 | case CURLFNM_PKW_INIT: |
| 85 | if(ISALPHA(c) && ISLOWER(c)) |
| 86 | keyword[i] = c; |
| 87 | else if(c == ':') |
| 88 | state = CURLFNM_PKW_DDOT; |
| 89 | else |
| 90 | return 0; |
| 91 | break; |
| 92 | case CURLFNM_PKW_DDOT: |
| 93 | if(c == ']') |
| 94 | found = TRUE; |
| 95 | else |
| 96 | return SETCHARSET_FAIL; |
| 97 | } |
| 98 | } |
| 99 | #undef KEYLEN |
| 100 | |
| 101 | *pattern = p; /* move caller's pattern pointer */ |
| 102 | if(strcmp(keyword, "digit") == 0) |
| 103 | charset[CURLFNM_DIGIT] = 1; |
| 104 | else if(strcmp(keyword, "alnum") == 0) |
| 105 | charset[CURLFNM_ALNUM] = 1; |
| 106 | else if(strcmp(keyword, "alpha") == 0) |
| 107 | charset[CURLFNM_ALPHA] = 1; |
| 108 | else if(strcmp(keyword, "xdigit") == 0) |
| 109 | charset[CURLFNM_XDIGIT] = 1; |
| 110 | else if(strcmp(keyword, "print") == 0) |
| 111 | charset[CURLFNM_PRINT] = 1; |
| 112 | else if(strcmp(keyword, "graph") == 0) |
| 113 | charset[CURLFNM_GRAPH] = 1; |
| 114 | else if(strcmp(keyword, "space") == 0) |
| 115 | charset[CURLFNM_SPACE] = 1; |
| 116 | else if(strcmp(keyword, "blank") == 0) |
| 117 | charset[CURLFNM_BLANK] = 1; |
| 118 | else if(strcmp(keyword, "upper") == 0) |
| 119 | charset[CURLFNM_UPPER] = 1; |
| 120 | else if(strcmp(keyword, "lower") == 0) |
| 121 | charset[CURLFNM_LOWER] = 1; |
| 122 | else |
| 123 | return SETCHARSET_FAIL; |
| 124 | return SETCHARSET_OK; |
| 125 | } |
| 126 | |
| 127 | /* returns 1 (true) if pattern is OK, 0 if is bad ("p" is pattern pointer) */ |
| 128 | static int setcharset(unsigned char **p, unsigned char *charset) |
| 129 | { |
| 130 | setcharset_state state = CURLFNM_SCHS_DEFAULT; |
| 131 | unsigned char rangestart = 0; |
| 132 | unsigned char lastchar = 0; |
| 133 | bool something_found = FALSE; |
| 134 | unsigned char c; |
| 135 | for(;;) { |
| 136 | c = **p; |
| 137 | switch(state) { |
| 138 | case CURLFNM_SCHS_DEFAULT: |
| 139 | if(ISALNUM(c)) { /* ASCII value */ |
| 140 | rangestart = c; |
| 141 | charset[c] = 1; |
| 142 | (*p)++; |
| 143 | state = CURLFNM_SCHS_MAYRANGE; |
| 144 | something_found = TRUE; |
| 145 | } |
| 146 | else if(c == ']') { |
| 147 | if(something_found) |
| 148 | return SETCHARSET_OK; |
| 149 | else |
| 150 | something_found = TRUE; |
| 151 | state = CURLFNM_SCHS_RIGHTBR; |
| 152 | charset[c] = 1; |
| 153 | (*p)++; |
| 154 | } |
| 155 | else if(c == '[') { |
| 156 | char c2 = *((*p)+1); |
| 157 | if(c2 == ':') { /* there has to be a keyword */ |
| 158 | (*p) += 2; |
| 159 | if(parsekeyword(p, charset)) { |
| 160 | state = CURLFNM_SCHS_DEFAULT; |
| 161 | } |
| 162 | else |
| 163 | return SETCHARSET_FAIL; |
| 164 | } |
| 165 | else { |
| 166 | charset[c] = 1; |
| 167 | (*p)++; |
| 168 | } |
| 169 | something_found = TRUE; |
| 170 | } |
| 171 | else if(c == '?' || c == '*') { |
| 172 | something_found = TRUE; |
| 173 | charset[c] = 1; |
| 174 | (*p)++; |
| 175 | } |
| 176 | else if(c == '^' || c == '!') { |
| 177 | if(!something_found) { |
| 178 | if(charset[CURLFNM_NEGATE]) { |
| 179 | charset[c] = 1; |
| 180 | something_found = TRUE; |
| 181 | } |
| 182 | else |
| 183 | charset[CURLFNM_NEGATE] = 1; /* negate charset */ |
| 184 | } |
| 185 | else |
| 186 | charset[c] = 1; |
| 187 | (*p)++; |
| 188 | } |
| 189 | else if(c == '\\') { |
| 190 | c = *(++(*p)); |
| 191 | if(ISPRINT((c))) { |
| 192 | something_found = TRUE; |
| 193 | state = CURLFNM_SCHS_MAYRANGE; |
| 194 | charset[c] = 1; |
| 195 | rangestart = c; |
| 196 | (*p)++; |
| 197 | } |
| 198 | else |
| 199 | return SETCHARSET_FAIL; |
| 200 | } |
| 201 | else if(c == '\0') { |
| 202 | return SETCHARSET_FAIL; |
| 203 | } |
| 204 | else { |
| 205 | charset[c] = 1; |
| 206 | (*p)++; |
| 207 | something_found = TRUE; |
| 208 | } |
| 209 | break; |
| 210 | case CURLFNM_SCHS_MAYRANGE: |
| 211 | if(c == '-') { |
| 212 | charset[c] = 1; |
| 213 | (*p)++; |
| 214 | lastchar = '-'; |
| 215 | state = CURLFNM_SCHS_MAYRANGE2; |
| 216 | } |
| 217 | else if(c == '[') { |
| 218 | state = CURLFNM_SCHS_DEFAULT; |
| 219 | } |
| 220 | else if(ISALNUM(c)) { |
| 221 | charset[c] = 1; |
| 222 | (*p)++; |
| 223 | } |
| 224 | else if(c == '\\') { |
| 225 | c = *(++(*p)); |
| 226 | if(ISPRINT(c)) { |
| 227 | charset[c] = 1; |
| 228 | (*p)++; |
| 229 | } |
| 230 | else |
| 231 | return SETCHARSET_FAIL; |
| 232 | } |
| 233 | else if(c == ']') { |
| 234 | return SETCHARSET_OK; |
| 235 | } |
| 236 | else |
| 237 | return SETCHARSET_FAIL; |
| 238 | break; |
| 239 | case CURLFNM_SCHS_MAYRANGE2: |
| 240 | if(c == '\\') { |
| 241 | c = *(++(*p)); |
| 242 | if(!ISPRINT(c)) |
| 243 | return SETCHARSET_FAIL; |
| 244 | } |
| 245 | if(c == ']') { |
| 246 | return SETCHARSET_OK; |
| 247 | } |
| 248 | else if(c == '\\') { |
| 249 | c = *(++(*p)); |
| 250 | if(ISPRINT(c)) { |
| 251 | charset[c] = 1; |
| 252 | state = CURLFNM_SCHS_DEFAULT; |
| 253 | (*p)++; |
| 254 | } |
| 255 | else |
| 256 | return SETCHARSET_FAIL; |
| 257 | } |
| 258 | if(c >= rangestart) { |
| 259 | if((ISLOWER(c) && ISLOWER(rangestart)) || |
| 260 | (ISDIGIT(c) && ISDIGIT(rangestart)) || |
| 261 | (ISUPPER(c) && ISUPPER(rangestart))) { |
| 262 | charset[lastchar] = 0; |
| 263 | rangestart++; |
| 264 | while(rangestart++ <= c) |
| 265 | charset[rangestart-1] = 1; |
| 266 | (*p)++; |
| 267 | state = CURLFNM_SCHS_DEFAULT; |
| 268 | } |
| 269 | else |
| 270 | return SETCHARSET_FAIL; |
| 271 | } |
| 272 | break; |
| 273 | case CURLFNM_SCHS_RIGHTBR: |
| 274 | if(c == '[') { |
| 275 | state = CURLFNM_SCHS_RIGHTBRLEFTBR; |
| 276 | charset[c] = 1; |
| 277 | (*p)++; |
| 278 | } |
| 279 | else if(c == ']') { |
| 280 | return SETCHARSET_OK; |
| 281 | } |
| 282 | else if(c == '\0') { |
| 283 | return SETCHARSET_FAIL; |
| 284 | } |
| 285 | else if(ISPRINT(c)) { |
| 286 | charset[c] = 1; |
| 287 | (*p)++; |
| 288 | state = CURLFNM_SCHS_DEFAULT; |
| 289 | } |
| 290 | else |
| 291 | /* used 'goto fail' instead of 'return SETCHARSET_FAIL' to avoid a |
| 292 | * nonsense warning 'statement not reached' at end of the fnc when |
| 293 | * compiling on Solaris */ |
| 294 | goto fail; |
| 295 | break; |
| 296 | case CURLFNM_SCHS_RIGHTBRLEFTBR: |
| 297 | if(c == ']') { |
| 298 | return SETCHARSET_OK; |
| 299 | } |
| 300 | else { |
| 301 | state = CURLFNM_SCHS_DEFAULT; |
| 302 | charset[c] = 1; |
| 303 | (*p)++; |
| 304 | } |
| 305 | break; |
| 306 | } |
| 307 | } |
| 308 | fail: |
| 309 | return SETCHARSET_FAIL; |
| 310 | } |
| 311 | |
| 312 | static int loop(const unsigned char *pattern, const unsigned char *string) |
| 313 | { |
| 314 | loop_state state = CURLFNM_LOOP_DEFAULT; |
| 315 | unsigned char *p = (unsigned char *)pattern; |
| 316 | unsigned char *s = (unsigned char *)string; |
| 317 | unsigned char charset[CURLFNM_CHSET_SIZE] = { 0 }; |
| 318 | int rc = 0; |
| 319 | |
| 320 | for (;;) { |
| 321 | switch(state) { |
| 322 | case CURLFNM_LOOP_DEFAULT: |
| 323 | if(*p == '*') { |
| 324 | while(*(p+1) == '*') /* eliminate multiple stars */ |
| 325 | p++; |
| 326 | if(*s == '\0' && *(p+1) == '\0') |
| 327 | return CURL_FNMATCH_MATCH; |
| 328 | rc = loop(p + 1, s); /* *.txt matches .txt <=> .txt matches .txt */ |
| 329 | if(rc == CURL_FNMATCH_MATCH) |
| 330 | return CURL_FNMATCH_MATCH; |
| 331 | if(*s) /* let the star eat up one character */ |
| 332 | s++; |
| 333 | else |
| 334 | return CURL_FNMATCH_NOMATCH; |
| 335 | } |
| 336 | else if(*p == '?') { |
| 337 | if(ISPRINT(*s)) { |
| 338 | s++; |
| 339 | p++; |
| 340 | } |
| 341 | else if(*s == '\0') |
| 342 | return CURL_FNMATCH_NOMATCH; |
| 343 | else |
| 344 | return CURL_FNMATCH_FAIL; /* cannot deal with other character */ |
| 345 | } |
| 346 | else if(*p == '\0') { |
| 347 | if(*s == '\0') |
| 348 | return CURL_FNMATCH_MATCH; |
| 349 | else |
| 350 | return CURL_FNMATCH_NOMATCH; |
| 351 | } |
| 352 | else if(*p == '\\') { |
| 353 | state = CURLFNM_LOOP_BACKSLASH; |
| 354 | p++; |
| 355 | } |
| 356 | else if(*p == '[') { |
| 357 | unsigned char *pp = p+1; /* cannot handle with pointer to register */ |
| 358 | if(setcharset(&pp, charset)) { |
| 359 | int found = FALSE; |
| 360 | if(charset[(unsigned int)*s]) |
| 361 | found = TRUE; |
| 362 | else if(charset[CURLFNM_ALNUM]) |
| 363 | found = ISALNUM(*s); |
| 364 | else if(charset[CURLFNM_ALPHA]) |
| 365 | found = ISALPHA(*s); |
| 366 | else if(charset[CURLFNM_DIGIT]) |
| 367 | found = ISDIGIT(*s); |
| 368 | else if(charset[CURLFNM_XDIGIT]) |
| 369 | found = ISXDIGIT(*s); |
| 370 | else if(charset[CURLFNM_PRINT]) |
| 371 | found = ISPRINT(*s); |
| 372 | else if(charset[CURLFNM_SPACE]) |
| 373 | found = ISSPACE(*s); |
| 374 | else if(charset[CURLFNM_UPPER]) |
| 375 | found = ISUPPER(*s); |
| 376 | else if(charset[CURLFNM_LOWER]) |
| 377 | found = ISLOWER(*s); |
| 378 | else if(charset[CURLFNM_BLANK]) |
| 379 | found = ISBLANK(*s); |
| 380 | else if(charset[CURLFNM_GRAPH]) |
| 381 | found = ISGRAPH(*s); |
| 382 | |
| 383 | if(charset[CURLFNM_NEGATE]) |
| 384 | found = !found; |
| 385 | |
| 386 | if(found) { |
| 387 | p = pp+1; |
| 388 | s++; |
| 389 | memset(charset, 0, CURLFNM_CHSET_SIZE); |
| 390 | } |
| 391 | else |
| 392 | return CURL_FNMATCH_NOMATCH; |
| 393 | } |
| 394 | else |
| 395 | return CURL_FNMATCH_FAIL; |
| 396 | } |
| 397 | else { |
| 398 | if(*p++ != *s++) |
| 399 | return CURL_FNMATCH_NOMATCH; |
| 400 | } |
| 401 | break; |
| 402 | case CURLFNM_LOOP_BACKSLASH: |
| 403 | if(ISPRINT(*p)) { |
| 404 | if(*p++ == *s++) |
| 405 | state = CURLFNM_LOOP_DEFAULT; |
| 406 | else |
| 407 | return CURL_FNMATCH_NOMATCH; |
| 408 | } |
| 409 | else |
| 410 | return CURL_FNMATCH_FAIL; |
| 411 | break; |
| 412 | } |
| 413 | } |
| 414 | } |
| 415 | |
| 416 | int Curl_fnmatch(void *ptr, const char *pattern, const char *string) |
| 417 | { |
| 418 | (void)ptr; /* the argument is specified by the curl_fnmatch_callback |
| 419 | prototype, but not used by Curl_fnmatch() */ |
| 420 | if(!pattern || !string) { |
| 421 | return CURL_FNMATCH_FAIL; |
| 422 | } |
| 423 | return loop((unsigned char *)pattern, (unsigned char *)string); |
| 424 | } |