blob: 6c5c9ef94595f5fb8924dd3a54c287e837d0a7e9 [file] [log] [blame]
Petr Machata1bbfbc62012-03-31 02:00:00 +02001/*
2 * This file is part of ltrace.
3 * Copyright (C) 2007, 2008, 2012 Petr Machata, Red Hat Inc.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation; either version 2 of the
8 * License, or (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
18 * 02110-1301 USA
19 */
20
21#include <sys/types.h>
22#include <regex.h>
23#include <string.h>
24#include <stdlib.h>
25
26static ssize_t
27match_character_class(const char *glob, size_t length, size_t from)
28{
29 size_t i;
30 if (length > 0)
31 for (i = from + 2; i < length - 1 && glob[++i] != ':'; )
32 ;
33 if (i >= length || glob[++i] != ']')
34 return -1;
35 return i;
36}
37
38static ssize_t
39match_brack(const char *glob, size_t length, size_t from, int *exclmp)
40{
41 size_t i = from + 1;
42
43 if (i >= length)
44 return -1;
45
46 /* Complement operator. */
47 *exclmp = 0;
48 if (glob[i] == '^' || glob[i] == '!') {
49 *exclmp = glob[i++] == '!';
50 if (i >= length)
51 return -1;
52 }
53
54 /* On first character, both [ and ] are legal. But when [ is
55 * followed with :, it's character class. */
56 if (glob[i] == '[' && glob[i + 1] == ':') {
57 ssize_t j = match_character_class(glob, length, i);
58 if (j < 0)
59 fail:
60 return -1;
61 i = j;
62 }
63 ++i; /* skip any character, including [ or ] */
64
65 int escape = 0;
66 for (; i < length; ++i) {
67 char c = glob[i];
68 if (escape) {
69 ++i;
70 escape = 0;
71
72 } else if (c == '[' && glob[i + 1] == ':') {
73 ssize_t j = match_character_class(glob, length, i);
74 if (j < 0)
75 goto fail;
76 i = j;
77
78 } else if (c == ']') {
79 return i;
80 }
81 }
82 return -1;
83}
84
85static int
86append(char **bufp, const char *str, size_t str_size,
87 size_t *sizep, size_t *allocp)
88{
89 if (str_size == 0)
90 str_size = strlen(str);
91 size_t nsize = *sizep + str_size;
92 if (nsize > *allocp) {
93 size_t nalloc = nsize * 2;
94 char *nbuf = realloc(*bufp, nalloc);
95 if (nbuf == NULL)
96 return -1;
97 *allocp = nalloc;
98 *bufp = nbuf;
99 }
100
101 memcpy(*bufp + *sizep, str, str_size);
102 *sizep = nsize;
103 return 0;
104}
105
106static int
107glob_to_regex(const char *glob, char **retp)
108{
109 size_t allocd = 0;
110 size_t size = 0;
111 char *buf = NULL;
112
113 size_t length = strlen(glob);
114 int escape = 0;
115 size_t i;
116 for(i = 0; i < length; ++i) {
117 char c = glob[i];
118 if (escape) {
119 if (c == '\\') {
120 if (append(&buf, "\\\\", 0,
121 &size, &allocd) < 0) {
122 fail:
123 free(buf);
124 return REG_ESPACE;
125 }
126
127 } else if (c == '*') {
128 if (append(&buf, "\\*", 0, &size, &allocd) < 0)
129 goto fail;
130 } else if (c == '?') {
131 if (append(&buf, "?", 0, &size, &allocd) < 0)
132 goto fail;
133 } else if (append(&buf, (char[]){ '\\', c }, 2,
134 &size, &allocd) < 0)
135 goto fail;
136 escape = 0;
137 } else {
138 if (c == '\\')
139 escape = 1;
140 else if (c == '[') {
141 int exclm;
142 ssize_t j = match_brack(glob, length, i, &exclm);
143 if (j < 0)
144 return REG_EBRACK;
145 if (exclm
146 && append(&buf, "[^", 2,
147 &size, &allocd) < 0)
148 goto fail;
149 if (append(&buf, glob + i + 2*exclm,
150 j - i + 1 - 2*exclm,
151 &size, &allocd) < 0)
152 goto fail;
153 i = j;
154
155 } else if (c == '*') {
156 if (append(&buf, ".*", 0, &size, &allocd) < 0)
157 goto fail;
158 } else if (c == '?') {
159 if (append(&buf, ".", 0, &size, &allocd) < 0)
160 goto fail;
161 } else if (c == '.') {
162 if (append(&buf, "\\.", 0, &size, &allocd) < 0)
163 goto fail;
164 } else if (append(&buf, &c, 1, &size, &allocd) < 0)
165 goto fail;
166 }
167 }
168
169 if (escape) {
170 free(buf);
171 return REG_EESCAPE;
172 }
173
174 {
175 char c = 0;
176 if (append(&buf, &c, 1, &size, &allocd) < 0)
177 goto fail;
178 }
179 *retp = buf;
180 return 0;
181}
182
183int
184globcomp(regex_t *preg, const char *glob, int cflags)
185{
186 char *regex;
187 int status = glob_to_regex(glob, &regex);
188 if (status != 0)
189 return status;
190 status = regcomp(preg, regex, cflags);
191 free(regex);
192 return status;
193}
194
195#ifdef TEST
196#include <assert.h>
197#include <stdio.h>
198
199static void
200translate(const char *glob, int exp_status, const char *expect)
201{
202 char *pattern = NULL;
203 int status = glob_to_regex(glob, &pattern);
204 if (status != exp_status) {
205 fprintf(stderr, "translating %s, expected status %d, got %d\n",
206 glob, exp_status, status);
207 return;
208 }
209
210 if (status == 0) {
211 assert(pattern != NULL);
212 if (strcmp(pattern, expect) != 0)
213 fprintf(stderr, "translating %s, expected %s, got %s\n",
214 glob, expect, pattern);
215 free(pattern);
216 } else {
217 assert(pattern == NULL);
218 }
219}
220
221static void
222try_match(const char *glob, const char *str, int expect)
223{
224 regex_t preg;
225 int status = globcomp(&preg, glob, 0);
226 assert(status == 0);
227 status = regexec(&preg, str, 0, NULL, 0);
228 assert(status == expect);
229 regfree(&preg);
230}
231
232int
233main(void)
234{
235 translate("*", 0, ".*");
236 translate("?", 0, ".");
237 translate(".*", 0, "\\..*");
238 translate("*.*", 0, ".*\\..*");
239 translate("*a*", 0, ".*a.*");
240 translate("[abc]", 0, "[abc]");
241 translate("[^abc]", 0, "[^abc]");
242 translate("[!abc]", 0, "[^abc]");
243 translate("[]]", 0, "[]]");
244 translate("[[]", 0, "[[]");
245 translate("[^]]", 0, "[^]]");
246 translate("[^a-z]", 0, "[^a-z]");
247 translate("[abc\\]]", 0, "[abc\\]]");
248 translate("[abc\\]def]", 0, "[abc\\]def]");
249 translate("[[:space:]]", 0, "[[:space:]]");
250 translate("[^[:space:]]", 0, "[^[:space:]]");
251 translate("[![:space:]]", 0, "[^[:space:]]");
252 translate("[^a-z]*", 0, "[^a-z].*");
253 translate("[^a-z]bar*", 0, "[^a-z]bar.*");
254 translate("*.*.*.*.*.*.*.*.*.*.*.*.*.*.*.*.", 0,
255 ".*\\..*\\..*\\..*\\..*\\..*\\..*\\..*\\."
256 ".*\\..*\\..*\\..*\\..*\\..*\\..*\\..*\\.");
257
258 translate("\\", REG_EESCAPE, NULL);
259 translate("[^[:naotuh\\", REG_EBRACK, NULL);
260 translate("[^[:", REG_EBRACK, NULL);
261 translate("[^[", REG_EBRACK, NULL);
262 translate("[^", REG_EBRACK, NULL);
263 translate("[\\", REG_EBRACK, NULL);
264 translate("[", REG_EBRACK, NULL);
265
266 try_match("abc*def", "abc012def", 0);
267 try_match("abc*def", "ab012def", REG_NOMATCH);
268 try_match("[abc]*def", "a1def", 0);
269 try_match("[abc]*def", "b1def", 0);
270 try_match("[abc]*def", "d1def", REG_NOMATCH);
271
272 return 0;
273}
274
275#endif