George Spelvin | b012508 | 2014-08-06 16:09:23 -0700 | [diff] [blame] | 1 | #include <linux/module.h> |
| 2 | #include <linux/glob.h> |
| 3 | |
| 4 | /* |
| 5 | * The only reason this code can be compiled as a module is because the |
| 6 | * ATA code that depends on it can be as well. In practice, they're |
| 7 | * both usually compiled in and the module overhead goes away. |
| 8 | */ |
| 9 | MODULE_DESCRIPTION("glob(7) matching"); |
| 10 | MODULE_LICENSE("Dual MIT/GPL"); |
| 11 | |
| 12 | /** |
| 13 | * glob_match - Shell-style pattern matching, like !fnmatch(pat, str, 0) |
| 14 | * @pat: Shell-style pattern to match, e.g. "*.[ch]". |
| 15 | * @str: String to match. The pattern must match the entire string. |
| 16 | * |
| 17 | * Perform shell-style glob matching, returning true (1) if the match |
| 18 | * succeeds, or false (0) if it fails. Equivalent to !fnmatch(@pat, @str, 0). |
| 19 | * |
| 20 | * Pattern metacharacters are ?, *, [ and \. |
| 21 | * (And, inside character classes, !, - and ].) |
| 22 | * |
| 23 | * This is small and simple implementation intended for device blacklists |
| 24 | * where a string is matched against a number of patterns. Thus, it |
| 25 | * does not preprocess the patterns. It is non-recursive, and run-time |
| 26 | * is at most quadratic: strlen(@str)*strlen(@pat). |
| 27 | * |
| 28 | * An example of the worst case is glob_match("*aaaaa", "aaaaaaaaaa"); |
| 29 | * it takes 6 passes over the pattern before matching the string. |
| 30 | * |
| 31 | * Like !fnmatch(@pat, @str, 0) and unlike the shell, this does NOT |
| 32 | * treat / or leading . specially; it isn't actually used for pathnames. |
| 33 | * |
| 34 | * Note that according to glob(7) (and unlike bash), character classes |
| 35 | * are complemented by a leading !; this does not support the regex-style |
| 36 | * [^a-z] syntax. |
| 37 | * |
| 38 | * An opening bracket without a matching close is matched literally. |
| 39 | */ |
| 40 | bool __pure glob_match(char const *pat, char const *str) |
| 41 | { |
| 42 | /* |
| 43 | * Backtrack to previous * on mismatch and retry starting one |
| 44 | * character later in the string. Because * matches all characters |
| 45 | * (no exception for /), it can be easily proved that there's |
| 46 | * never a need to backtrack multiple levels. |
| 47 | */ |
| 48 | char const *back_pat = NULL, *back_str = back_str; |
| 49 | |
| 50 | /* |
| 51 | * Loop over each token (character or class) in pat, matching |
| 52 | * it against the remaining unmatched tail of str. Return false |
| 53 | * on mismatch, or true after matching the trailing nul bytes. |
| 54 | */ |
| 55 | for (;;) { |
| 56 | unsigned char c = *str++; |
| 57 | unsigned char d = *pat++; |
| 58 | |
| 59 | switch (d) { |
| 60 | case '?': /* Wildcard: anything but nul */ |
| 61 | if (c == '\0') |
| 62 | return false; |
| 63 | break; |
| 64 | case '*': /* Any-length wildcard */ |
| 65 | if (*pat == '\0') /* Optimize trailing * case */ |
| 66 | return true; |
| 67 | back_pat = pat; |
| 68 | back_str = --str; /* Allow zero-length match */ |
| 69 | break; |
| 70 | case '[': { /* Character class */ |
| 71 | bool match = false, inverted = (*pat == '!'); |
| 72 | char const *class = pat + inverted; |
| 73 | unsigned char a = *class++; |
| 74 | |
| 75 | /* |
| 76 | * Iterate over each span in the character class. |
| 77 | * A span is either a single character a, or a |
| 78 | * range a-b. The first span may begin with ']'. |
| 79 | */ |
| 80 | do { |
| 81 | unsigned char b = a; |
| 82 | |
| 83 | if (a == '\0') /* Malformed */ |
| 84 | goto literal; |
| 85 | |
| 86 | if (class[0] == '-' && class[1] != ']') { |
| 87 | b = class[1]; |
| 88 | |
| 89 | if (b == '\0') |
| 90 | goto literal; |
| 91 | |
| 92 | class += 2; |
| 93 | /* Any special action if a > b? */ |
| 94 | } |
| 95 | match |= (a <= c && c <= b); |
| 96 | } while ((a = *class++) != ']'); |
| 97 | |
| 98 | if (match == inverted) |
| 99 | goto backtrack; |
| 100 | pat = class; |
| 101 | } |
| 102 | break; |
| 103 | case '\\': |
| 104 | d = *pat++; |
| 105 | /*FALLTHROUGH*/ |
| 106 | default: /* Literal character */ |
| 107 | literal: |
| 108 | if (c == d) { |
| 109 | if (d == '\0') |
| 110 | return true; |
| 111 | break; |
| 112 | } |
| 113 | backtrack: |
| 114 | if (c == '\0' || !back_pat) |
| 115 | return false; /* No point continuing */ |
| 116 | /* Try again from last *, one character later in str. */ |
| 117 | pat = back_pat; |
| 118 | str = ++back_str; |
| 119 | break; |
| 120 | } |
| 121 | } |
| 122 | } |
| 123 | EXPORT_SYMBOL(glob_match); |
George Spelvin | 5f9be82 | 2014-08-06 16:09:25 -0700 | [diff] [blame] | 124 | |
| 125 | |
| 126 | #ifdef CONFIG_GLOB_SELFTEST |
| 127 | |
| 128 | #include <linux/printk.h> |
| 129 | #include <linux/moduleparam.h> |
| 130 | |
| 131 | /* Boot with "glob.verbose=1" to show successful tests, too */ |
| 132 | static bool verbose = false; |
| 133 | module_param(verbose, bool, 0); |
| 134 | |
| 135 | struct glob_test { |
| 136 | char const *pat, *str; |
| 137 | bool expected; |
| 138 | }; |
| 139 | |
| 140 | static bool __pure __init test(char const *pat, char const *str, bool expected) |
| 141 | { |
| 142 | bool match = glob_match(pat, str); |
| 143 | bool success = match == expected; |
| 144 | |
| 145 | /* Can't get string literals into a particular section, so... */ |
| 146 | static char const msg_error[] __initconst = |
| 147 | KERN_ERR "glob: \"%s\" vs. \"%s\": %s *** ERROR ***\n"; |
| 148 | static char const msg_ok[] __initconst = |
| 149 | KERN_DEBUG "glob: \"%s\" vs. \"%s\": %s OK\n"; |
| 150 | static char const mismatch[] __initconst = "mismatch"; |
| 151 | char const *message; |
| 152 | |
| 153 | if (!success) |
| 154 | message = msg_error; |
| 155 | else if (verbose) |
| 156 | message = msg_ok; |
| 157 | else |
| 158 | return success; |
| 159 | |
| 160 | printk(message, pat, str, mismatch + 3*match); |
| 161 | return success; |
| 162 | } |
| 163 | |
| 164 | /* |
| 165 | * The tests are all jammed together in one array to make it simpler |
| 166 | * to place that array in the .init.rodata section. The obvious |
| 167 | * "array of structures containing char *" has no way to force the |
| 168 | * pointed-to strings to be in a particular section. |
| 169 | * |
| 170 | * Anyway, a test consists of: |
| 171 | * 1. Expected glob_match result: '1' or '0'. |
| 172 | * 2. Pattern to match: null-terminated string |
| 173 | * 3. String to match against: null-terminated string |
| 174 | * |
| 175 | * The list of tests is terminated with a final '\0' instead of |
| 176 | * a glob_match result character. |
| 177 | */ |
| 178 | static char const glob_tests[] __initconst = |
| 179 | /* Some basic tests */ |
| 180 | "1" "a\0" "a\0" |
| 181 | "0" "a\0" "b\0" |
| 182 | "0" "a\0" "aa\0" |
| 183 | "0" "a\0" "\0" |
| 184 | "1" "\0" "\0" |
| 185 | "0" "\0" "a\0" |
| 186 | /* Simple character class tests */ |
| 187 | "1" "[a]\0" "a\0" |
| 188 | "0" "[a]\0" "b\0" |
| 189 | "0" "[!a]\0" "a\0" |
| 190 | "1" "[!a]\0" "b\0" |
| 191 | "1" "[ab]\0" "a\0" |
| 192 | "1" "[ab]\0" "b\0" |
| 193 | "0" "[ab]\0" "c\0" |
| 194 | "1" "[!ab]\0" "c\0" |
| 195 | "1" "[a-c]\0" "b\0" |
| 196 | "0" "[a-c]\0" "d\0" |
| 197 | /* Corner cases in character class parsing */ |
| 198 | "1" "[a-c-e-g]\0" "-\0" |
| 199 | "0" "[a-c-e-g]\0" "d\0" |
| 200 | "1" "[a-c-e-g]\0" "f\0" |
| 201 | "1" "[]a-ceg-ik[]\0" "a\0" |
| 202 | "1" "[]a-ceg-ik[]\0" "]\0" |
| 203 | "1" "[]a-ceg-ik[]\0" "[\0" |
| 204 | "1" "[]a-ceg-ik[]\0" "h\0" |
| 205 | "0" "[]a-ceg-ik[]\0" "f\0" |
| 206 | "0" "[!]a-ceg-ik[]\0" "h\0" |
| 207 | "0" "[!]a-ceg-ik[]\0" "]\0" |
| 208 | "1" "[!]a-ceg-ik[]\0" "f\0" |
| 209 | /* Simple wild cards */ |
| 210 | "1" "?\0" "a\0" |
| 211 | "0" "?\0" "aa\0" |
| 212 | "0" "??\0" "a\0" |
| 213 | "1" "?x?\0" "axb\0" |
| 214 | "0" "?x?\0" "abx\0" |
| 215 | "0" "?x?\0" "xab\0" |
| 216 | /* Asterisk wild cards (backtracking) */ |
| 217 | "0" "*??\0" "a\0" |
| 218 | "1" "*??\0" "ab\0" |
| 219 | "1" "*??\0" "abc\0" |
| 220 | "1" "*??\0" "abcd\0" |
| 221 | "0" "??*\0" "a\0" |
| 222 | "1" "??*\0" "ab\0" |
| 223 | "1" "??*\0" "abc\0" |
| 224 | "1" "??*\0" "abcd\0" |
| 225 | "0" "?*?\0" "a\0" |
| 226 | "1" "?*?\0" "ab\0" |
| 227 | "1" "?*?\0" "abc\0" |
| 228 | "1" "?*?\0" "abcd\0" |
| 229 | "1" "*b\0" "b\0" |
| 230 | "1" "*b\0" "ab\0" |
| 231 | "0" "*b\0" "ba\0" |
| 232 | "1" "*b\0" "bb\0" |
| 233 | "1" "*b\0" "abb\0" |
| 234 | "1" "*b\0" "bab\0" |
| 235 | "1" "*bc\0" "abbc\0" |
| 236 | "1" "*bc\0" "bc\0" |
| 237 | "1" "*bc\0" "bbc\0" |
| 238 | "1" "*bc\0" "bcbc\0" |
| 239 | /* Multiple asterisks (complex backtracking) */ |
| 240 | "1" "*ac*\0" "abacadaeafag\0" |
| 241 | "1" "*ac*ae*ag*\0" "abacadaeafag\0" |
| 242 | "1" "*a*b*[bc]*[ef]*g*\0" "abacadaeafag\0" |
| 243 | "0" "*a*b*[ef]*[cd]*g*\0" "abacadaeafag\0" |
| 244 | "1" "*abcd*\0" "abcabcabcabcdefg\0" |
| 245 | "1" "*ab*cd*\0" "abcabcabcabcdefg\0" |
| 246 | "1" "*abcd*abcdef*\0" "abcabcdabcdeabcdefg\0" |
| 247 | "0" "*abcd*\0" "abcabcabcabcefg\0" |
| 248 | "0" "*ab*cd*\0" "abcabcabcabcefg\0"; |
| 249 | |
| 250 | static int __init glob_init(void) |
| 251 | { |
| 252 | unsigned successes = 0; |
| 253 | unsigned n = 0; |
| 254 | char const *p = glob_tests; |
| 255 | static char const message[] __initconst = |
| 256 | KERN_INFO "glob: %u self-tests passed, %u failed\n"; |
| 257 | |
| 258 | /* |
| 259 | * Tests are jammed together in a string. The first byte is '1' |
| 260 | * or '0' to indicate the expected outcome, or '\0' to indicate the |
| 261 | * end of the tests. Then come two null-terminated strings: the |
| 262 | * pattern and the string to match it against. |
| 263 | */ |
| 264 | while (*p) { |
| 265 | bool expected = *p++ & 1; |
| 266 | char const *pat = p; |
| 267 | |
| 268 | p += strlen(p) + 1; |
| 269 | successes += test(pat, p, expected); |
| 270 | p += strlen(p) + 1; |
| 271 | n++; |
| 272 | } |
| 273 | |
| 274 | n -= successes; |
| 275 | printk(message, successes, n); |
| 276 | |
| 277 | /* What's the errno for "kernel bug detected"? Guess... */ |
| 278 | return n ? -ECANCELED : 0; |
| 279 | } |
| 280 | |
| 281 | /* We need a dummy exit function to allow unload */ |
| 282 | static void __exit glob_fini(void) { } |
| 283 | |
| 284 | module_init(glob_init); |
| 285 | module_exit(glob_fini); |
| 286 | |
| 287 | #endif /* CONFIG_GLOB_SELFTEST */ |