blob: c497b3c9d569ede2f8f1dbe91a4ae63a4365accf [file] [log] [blame]
Petr Machata1bbfbc62012-03-31 02:00:00 +02001/*
2 * This file is part of ltrace.
Petr Machatad435b432013-01-08 23:26:10 +01003 * Copyright (C) 2007,2008,2012,2013 Petr Machata, Red Hat Inc.
Petr Machata1bbfbc62012-03-31 02:00:00 +02004 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation; either version 2 of the
8 * License, or (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
18 * 02110-1301 USA
19 */
20
21#include <sys/types.h>
22#include <regex.h>
23#include <string.h>
24#include <stdlib.h>
Petr Machataa41e6fc2012-11-09 16:43:21 +010025#include <assert.h>
Petr Machata1bbfbc62012-03-31 02:00:00 +020026
27static ssize_t
28match_character_class(const char *glob, size_t length, size_t from)
29{
Andrey Zonov6bb42012013-02-14 12:32:06 +010030 size_t i = 0;
Petr Machata1bbfbc62012-03-31 02:00:00 +020031 if (length > 0)
32 for (i = from + 2; i < length - 1 && glob[++i] != ':'; )
33 ;
34 if (i >= length || glob[++i] != ']')
35 return -1;
36 return i;
37}
38
39static ssize_t
40match_brack(const char *glob, size_t length, size_t from, int *exclmp)
41{
42 size_t i = from + 1;
43
44 if (i >= length)
45 return -1;
46
47 /* Complement operator. */
48 *exclmp = 0;
49 if (glob[i] == '^' || glob[i] == '!') {
50 *exclmp = glob[i++] == '!';
51 if (i >= length)
52 return -1;
53 }
54
55 /* On first character, both [ and ] are legal. But when [ is
56 * followed with :, it's character class. */
57 if (glob[i] == '[' && glob[i + 1] == ':') {
58 ssize_t j = match_character_class(glob, length, i);
59 if (j < 0)
60 fail:
61 return -1;
62 i = j;
63 }
64 ++i; /* skip any character, including [ or ] */
65
Petr Machata1bbfbc62012-03-31 02:00:00 +020066 for (; i < length; ++i) {
67 char c = glob[i];
Petr Machatad435b432013-01-08 23:26:10 +010068 if (c == '[' && glob[i + 1] == ':') {
Petr Machata1bbfbc62012-03-31 02:00:00 +020069 ssize_t j = match_character_class(glob, length, i);
70 if (j < 0)
71 goto fail;
72 i = j;
73
74 } else if (c == ']') {
75 return i;
76 }
77 }
78 return -1;
79}
80
81static int
82append(char **bufp, const char *str, size_t str_size,
83 size_t *sizep, size_t *allocp)
84{
85 if (str_size == 0)
86 str_size = strlen(str);
87 size_t nsize = *sizep + str_size;
88 if (nsize > *allocp) {
89 size_t nalloc = nsize * 2;
90 char *nbuf = realloc(*bufp, nalloc);
91 if (nbuf == NULL)
92 return -1;
93 *allocp = nalloc;
94 *bufp = nbuf;
95 }
96
97 memcpy(*bufp + *sizep, str, str_size);
98 *sizep = nsize;
99 return 0;
100}
101
102static int
103glob_to_regex(const char *glob, char **retp)
104{
105 size_t allocd = 0;
106 size_t size = 0;
107 char *buf = NULL;
108
109 size_t length = strlen(glob);
110 int escape = 0;
111 size_t i;
112 for(i = 0; i < length; ++i) {
113 char c = glob[i];
114 if (escape) {
115 if (c == '\\') {
116 if (append(&buf, "\\\\", 0,
117 &size, &allocd) < 0) {
118 fail:
119 free(buf);
120 return REG_ESPACE;
121 }
122
123 } else if (c == '*') {
124 if (append(&buf, "\\*", 0, &size, &allocd) < 0)
125 goto fail;
126 } else if (c == '?') {
127 if (append(&buf, "?", 0, &size, &allocd) < 0)
128 goto fail;
129 } else if (append(&buf, (char[]){ '\\', c }, 2,
130 &size, &allocd) < 0)
131 goto fail;
132 escape = 0;
133 } else {
134 if (c == '\\')
135 escape = 1;
136 else if (c == '[') {
137 int exclm;
138 ssize_t j = match_brack(glob, length, i, &exclm);
Petr Machata28cd84d2012-11-09 13:26:19 +0100139 if (j < 0) {
140 free(buf);
Petr Machata1bbfbc62012-03-31 02:00:00 +0200141 return REG_EBRACK;
Petr Machata28cd84d2012-11-09 13:26:19 +0100142 }
Petr Machata1bbfbc62012-03-31 02:00:00 +0200143 if (exclm
144 && append(&buf, "[^", 2,
145 &size, &allocd) < 0)
146 goto fail;
147 if (append(&buf, glob + i + 2*exclm,
148 j - i + 1 - 2*exclm,
149 &size, &allocd) < 0)
150 goto fail;
151 i = j;
152
153 } else if (c == '*') {
154 if (append(&buf, ".*", 0, &size, &allocd) < 0)
155 goto fail;
156 } else if (c == '?') {
157 if (append(&buf, ".", 0, &size, &allocd) < 0)
158 goto fail;
159 } else if (c == '.') {
160 if (append(&buf, "\\.", 0, &size, &allocd) < 0)
161 goto fail;
162 } else if (append(&buf, &c, 1, &size, &allocd) < 0)
163 goto fail;
164 }
165 }
166
167 if (escape) {
168 free(buf);
169 return REG_EESCAPE;
170 }
171
172 {
173 char c = 0;
174 if (append(&buf, &c, 1, &size, &allocd) < 0)
175 goto fail;
176 }
177 *retp = buf;
Petr Machatab6c5c8c2012-12-08 03:23:39 +0100178 return 0;
Petr Machata1bbfbc62012-03-31 02:00:00 +0200179}
180
181int
182globcomp(regex_t *preg, const char *glob, int cflags)
183{
Petr Machataa41e6fc2012-11-09 16:43:21 +0100184 char *regex = NULL;
Petr Machata1bbfbc62012-03-31 02:00:00 +0200185 int status = glob_to_regex(glob, &regex);
Petr Machatab6c5c8c2012-12-08 03:23:39 +0100186 if (status != 0)
Petr Machata1bbfbc62012-03-31 02:00:00 +0200187 return status;
Petr Machataa41e6fc2012-11-09 16:43:21 +0100188 assert(regex != NULL);
Petr Machata1bbfbc62012-03-31 02:00:00 +0200189 status = regcomp(preg, regex, cflags);
190 free(regex);
191 return status;
192}
193
194#ifdef TEST
Petr Machata1bbfbc62012-03-31 02:00:00 +0200195#include <stdio.h>
196
197static void
198translate(const char *glob, int exp_status, const char *expect)
199{
200 char *pattern = NULL;
201 int status = glob_to_regex(glob, &pattern);
202 if (status != exp_status) {
203 fprintf(stderr, "translating %s, expected status %d, got %d\n",
204 glob, exp_status, status);
205 return;
206 }
207
208 if (status == 0) {
209 assert(pattern != NULL);
210 if (strcmp(pattern, expect) != 0)
211 fprintf(stderr, "translating %s, expected %s, got %s\n",
212 glob, expect, pattern);
213 free(pattern);
214 } else {
215 assert(pattern == NULL);
216 }
217}
218
219static void
220try_match(const char *glob, const char *str, int expect)
221{
222 regex_t preg;
223 int status = globcomp(&preg, glob, 0);
224 assert(status == 0);
225 status = regexec(&preg, str, 0, NULL, 0);
226 assert(status == expect);
227 regfree(&preg);
228}
229
230int
231main(void)
232{
233 translate("*", 0, ".*");
234 translate("?", 0, ".");
235 translate(".*", 0, "\\..*");
236 translate("*.*", 0, ".*\\..*");
237 translate("*a*", 0, ".*a.*");
238 translate("[abc]", 0, "[abc]");
239 translate("[^abc]", 0, "[^abc]");
240 translate("[!abc]", 0, "[^abc]");
241 translate("[]]", 0, "[]]");
242 translate("[[]", 0, "[[]");
243 translate("[^]]", 0, "[^]]");
244 translate("[^a-z]", 0, "[^a-z]");
245 translate("[abc\\]]", 0, "[abc\\]]");
246 translate("[abc\\]def]", 0, "[abc\\]def]");
247 translate("[[:space:]]", 0, "[[:space:]]");
248 translate("[^[:space:]]", 0, "[^[:space:]]");
249 translate("[![:space:]]", 0, "[^[:space:]]");
250 translate("[^a-z]*", 0, "[^a-z].*");
251 translate("[^a-z]bar*", 0, "[^a-z]bar.*");
252 translate("*.*.*.*.*.*.*.*.*.*.*.*.*.*.*.*.", 0,
253 ".*\\..*\\..*\\..*\\..*\\..*\\..*\\..*\\."
254 ".*\\..*\\..*\\..*\\..*\\..*\\..*\\..*\\.");
255
256 translate("\\", REG_EESCAPE, NULL);
257 translate("[^[:naotuh\\", REG_EBRACK, NULL);
258 translate("[^[:", REG_EBRACK, NULL);
259 translate("[^[", REG_EBRACK, NULL);
260 translate("[^", REG_EBRACK, NULL);
261 translate("[\\", REG_EBRACK, NULL);
262 translate("[", REG_EBRACK, NULL);
Petr Machata28cd84d2012-11-09 13:26:19 +0100263 translate("abc[", REG_EBRACK, NULL);
Petr Machata1bbfbc62012-03-31 02:00:00 +0200264
265 try_match("abc*def", "abc012def", 0);
266 try_match("abc*def", "ab012def", REG_NOMATCH);
267 try_match("[abc]*def", "a1def", 0);
268 try_match("[abc]*def", "b1def", 0);
269 try_match("[abc]*def", "d1def", REG_NOMATCH);
270
271 return 0;
272}
273
274#endif