blob: 6c4de9002e4c53d5954a8e833f82737f321bccd9 [file] [log] [blame]
Daniel Veillard4255d502002-04-16 15:50:10 +00001/*
2 * testRegexp.c: simple module for testing regular expressions
3 *
4 * See Copyright for the status of this software.
5 *
6 * Daniel Veillard <veillard@redhat.com>
7 */
8
Daniel Veillard4255d502002-04-16 15:50:10 +00009#include "libxml.h"
10#ifdef LIBXML_AUTOMATA_ENABLED
William M. Brackc1099be2007-01-31 18:38:56 +000011#include <string.h>
Daniel Veillard4255d502002-04-16 15:50:10 +000012
Daniel Veillard84d70a42002-09-16 10:51:38 +000013#include <libxml/tree.h>
Daniel Veillard4255d502002-04-16 15:50:10 +000014#include <libxml/xmlautomata.h>
15
16static int scanNumber(char **ptr) {
17 int ret = 0;
18 char *cur;
19
20 cur = *ptr;
21 while ((*cur >= '0') && (*cur <= '9')) {
22 ret = ret * 10 + (*cur - '0');
23 cur++;
24 }
25 *ptr = cur;
26 return(ret);
27}
28
29static void
30testRegexpFile(const char *filename) {
31 FILE *input;
William M. Brack272693c2003-11-14 16:20:34 +000032 char expr[5000];
Daniel Veillard4255d502002-04-16 15:50:10 +000033 int len;
34 int ret;
35 int i;
36 xmlAutomataPtr am;
37 xmlAutomataStatePtr states[1000];
38 xmlRegexpPtr regexp = NULL;
Daniel Veillard118aed72002-09-24 14:13:13 +000039 xmlRegExecCtxtPtr exec = NULL;
Daniel Veillard4255d502002-04-16 15:50:10 +000040
41 for (i = 0;i<1000;i++)
42 states[i] = NULL;
43
44 input = fopen(filename, "r");
45 if (input == NULL) {
46 xmlGenericError(xmlGenericErrorContext,
47 "Cannot open %s for reading\n", filename);
48 return;
49 }
50
51 am = xmlNewAutomata();
52 if (am == NULL) {
53 xmlGenericError(xmlGenericErrorContext,
54 "Cannot create automata\n");
55 fclose(input);
Daniel Veillard11ce4002006-03-10 00:36:23 +000056 return;
Daniel Veillard4255d502002-04-16 15:50:10 +000057 }
58 states[0] = xmlAutomataGetInitState(am);
59 if (states[0] == NULL) {
60 xmlGenericError(xmlGenericErrorContext,
61 "Cannot get start state\n");
62 xmlFreeAutomata(am);
63 fclose(input);
Daniel Veillard11ce4002006-03-10 00:36:23 +000064 return;
Daniel Veillard4255d502002-04-16 15:50:10 +000065 }
66 ret = 0;
67
William M. Brack272693c2003-11-14 16:20:34 +000068 while (fgets(expr, 4500, input) != NULL) {
69 if (expr[0] == '#')
Daniel Veillard4255d502002-04-16 15:50:10 +000070 continue;
William M. Brack272693c2003-11-14 16:20:34 +000071 len = strlen(expr);
Daniel Veillard4255d502002-04-16 15:50:10 +000072 len--;
Daniel Veillardf8e3db02012-09-11 13:26:36 +080073 while ((len >= 0) &&
William M. Brack272693c2003-11-14 16:20:34 +000074 ((expr[len] == '\n') || (expr[len] == '\t') ||
75 (expr[len] == '\r') || (expr[len] == ' '))) len--;
Daniel Veillardf8e3db02012-09-11 13:26:36 +080076 expr[len + 1] = 0;
Daniel Veillard4255d502002-04-16 15:50:10 +000077 if (len >= 0) {
William M. Brack272693c2003-11-14 16:20:34 +000078 if ((am != NULL) && (expr[0] == 't') && (expr[1] == ' ')) {
79 char *ptr = &expr[2];
Daniel Veillard4255d502002-04-16 15:50:10 +000080 int from, to;
81
82 from = scanNumber(&ptr);
83 if (*ptr != ' ') {
84 xmlGenericError(xmlGenericErrorContext,
William M. Brack272693c2003-11-14 16:20:34 +000085 "Bad line %s\n", expr);
Daniel Veillard4255d502002-04-16 15:50:10 +000086 break;
87 }
88 if (states[from] == NULL)
89 states[from] = xmlAutomataNewState(am);
90 ptr++;
91 to = scanNumber(&ptr);
92 if (*ptr != ' ') {
93 xmlGenericError(xmlGenericErrorContext,
William M. Brack272693c2003-11-14 16:20:34 +000094 "Bad line %s\n", expr);
Daniel Veillard4255d502002-04-16 15:50:10 +000095 break;
96 }
97 if (states[to] == NULL)
98 states[to] = xmlAutomataNewState(am);
99 ptr++;
100 xmlAutomataNewTransition(am, states[from], states[to],
101 BAD_CAST ptr, NULL);
William M. Brack272693c2003-11-14 16:20:34 +0000102 } else if ((am != NULL) && (expr[0] == 'e') && (expr[1] == ' ')) {
103 char *ptr = &expr[2];
Daniel Veillard4255d502002-04-16 15:50:10 +0000104 int from, to;
105
106 from = scanNumber(&ptr);
107 if (*ptr != ' ') {
108 xmlGenericError(xmlGenericErrorContext,
William M. Brack272693c2003-11-14 16:20:34 +0000109 "Bad line %s\n", expr);
Daniel Veillard4255d502002-04-16 15:50:10 +0000110 break;
111 }
112 if (states[from] == NULL)
113 states[from] = xmlAutomataNewState(am);
114 ptr++;
115 to = scanNumber(&ptr);
116 if (states[to] == NULL)
117 states[to] = xmlAutomataNewState(am);
118 xmlAutomataNewEpsilon(am, states[from], states[to]);
William M. Brack272693c2003-11-14 16:20:34 +0000119 } else if ((am != NULL) && (expr[0] == 'f') && (expr[1] == ' ')) {
120 char *ptr = &expr[2];
Daniel Veillard4255d502002-04-16 15:50:10 +0000121 int state;
122
123 state = scanNumber(&ptr);
124 if (states[state] == NULL) {
125 xmlGenericError(xmlGenericErrorContext,
William M. Brack272693c2003-11-14 16:20:34 +0000126 "Bad state %d : %s\n", state, expr);
Daniel Veillard4255d502002-04-16 15:50:10 +0000127 break;
128 }
129 xmlAutomataSetFinalState(am, states[state]);
William M. Brack272693c2003-11-14 16:20:34 +0000130 } else if ((am != NULL) && (expr[0] == 'c') && (expr[1] == ' ')) {
131 char *ptr = &expr[2];
Daniel Veillard4255d502002-04-16 15:50:10 +0000132 int from, to;
133 int min, max;
134
135 from = scanNumber(&ptr);
136 if (*ptr != ' ') {
137 xmlGenericError(xmlGenericErrorContext,
William M. Brack272693c2003-11-14 16:20:34 +0000138 "Bad line %s\n", expr);
Daniel Veillard4255d502002-04-16 15:50:10 +0000139 break;
140 }
141 if (states[from] == NULL)
142 states[from] = xmlAutomataNewState(am);
143 ptr++;
144 to = scanNumber(&ptr);
145 if (*ptr != ' ') {
146 xmlGenericError(xmlGenericErrorContext,
William M. Brack272693c2003-11-14 16:20:34 +0000147 "Bad line %s\n", expr);
Daniel Veillard4255d502002-04-16 15:50:10 +0000148 break;
149 }
150 if (states[to] == NULL)
151 states[to] = xmlAutomataNewState(am);
152 ptr++;
153 min = scanNumber(&ptr);
154 if (*ptr != ' ') {
155 xmlGenericError(xmlGenericErrorContext,
William M. Brack272693c2003-11-14 16:20:34 +0000156 "Bad line %s\n", expr);
Daniel Veillard4255d502002-04-16 15:50:10 +0000157 break;
158 }
159 ptr++;
160 max = scanNumber(&ptr);
161 if (*ptr != ' ') {
162 xmlGenericError(xmlGenericErrorContext,
William M. Brack272693c2003-11-14 16:20:34 +0000163 "Bad line %s\n", expr);
Daniel Veillard4255d502002-04-16 15:50:10 +0000164 break;
165 }
166 ptr++;
167 xmlAutomataNewCountTrans(am, states[from], states[to],
168 BAD_CAST ptr, min, max, NULL);
William M. Brack272693c2003-11-14 16:20:34 +0000169 } else if ((am != NULL) && (expr[0] == '-') && (expr[1] == '-')) {
Daniel Veillard4255d502002-04-16 15:50:10 +0000170 /* end of the automata */
171 regexp = xmlAutomataCompile(am);
172 xmlFreeAutomata(am);
173 am = NULL;
174 if (regexp == NULL) {
175 xmlGenericError(xmlGenericErrorContext,
176 "Failed to compile the automata");
177 break;
178 }
William M. Brack272693c2003-11-14 16:20:34 +0000179 } else if ((expr[0] == '=') && (expr[1] == '>')) {
Daniel Veillard4255d502002-04-16 15:50:10 +0000180 if (regexp == NULL) {
181 printf("=> failed not compiled\n");
182 } else {
183 if (exec == NULL)
184 exec = xmlRegNewExecCtxt(regexp, NULL, NULL);
185 if (ret == 0) {
186 ret = xmlRegExecPushString(exec, NULL, NULL);
187 }
188 if (ret == 1)
189 printf("=> Passed\n");
190 else if ((ret == 0) || (ret == -1))
191 printf("=> Failed\n");
192 else if (ret < 0)
193 printf("=> Error\n");
194 xmlRegFreeExecCtxt(exec);
195 exec = NULL;
196 }
197 ret = 0;
198 } else if (regexp != NULL) {
199 if (exec == NULL)
200 exec = xmlRegNewExecCtxt(regexp, NULL, NULL);
William M. Brack272693c2003-11-14 16:20:34 +0000201 ret = xmlRegExecPushString(exec, BAD_CAST expr, NULL);
Daniel Veillard4255d502002-04-16 15:50:10 +0000202 } else {
203 xmlGenericError(xmlGenericErrorContext,
William M. Brack272693c2003-11-14 16:20:34 +0000204 "Unexpected line %s\n", expr);
Daniel Veillard4255d502002-04-16 15:50:10 +0000205 }
206 }
207 }
208 fclose(input);
209 if (regexp != NULL)
210 xmlRegFreeRegexp(regexp);
211 if (exec != NULL)
212 xmlRegFreeExecCtxt(exec);
213 if (am != NULL)
214 xmlFreeAutomata(am);
215}
216
217int main(int argc, char **argv) {
218
219 xmlInitMemory();
220
221 if (argc == 1) {
222 int ret;
223 xmlAutomataPtr am;
224 xmlAutomataStatePtr start, cur;
225 xmlRegexpPtr regexp;
226 xmlRegExecCtxtPtr exec;
227
228 am = xmlNewAutomata();
229 start = xmlAutomataGetInitState(am);
230
231 /* generate a[ba]*a */
232 cur = xmlAutomataNewTransition(am, start, NULL, BAD_CAST"a", NULL);
233 xmlAutomataNewTransition(am, cur, cur, BAD_CAST"b", NULL);
234 xmlAutomataNewTransition(am, cur, cur, BAD_CAST"a", NULL);
235 cur = xmlAutomataNewCountTrans(am, cur, NULL, BAD_CAST"a", 2, 3, NULL);
236 xmlAutomataSetFinalState(am, cur);
237
238 /* compile it in a regexp and free the automata */
239 regexp = xmlAutomataCompile(am);
240 xmlFreeAutomata(am);
241
242 /* test the regexp */
243 xmlRegexpPrint(stdout, regexp);
244 exec = xmlRegNewExecCtxt(regexp, NULL, NULL);
245 ret = xmlRegExecPushString(exec, BAD_CAST"a", NULL);
246 if (ret == 1)
247 printf("final\n");
248 else if (ret < 0)
249 printf("error\n");
250 ret =xmlRegExecPushString(exec, BAD_CAST"a", NULL);
251 if (ret == 1)
252 printf("final\n");
253 else if (ret < 0)
254 printf("error\n");
255 ret =xmlRegExecPushString(exec, BAD_CAST"b", NULL);
256 if (ret == 1)
257 printf("final\n");
258 else if (ret < 0)
259 printf("error\n");
260 ret =xmlRegExecPushString(exec, BAD_CAST"a", NULL);
261 if (ret == 1)
262 printf("final\n");
263 else if (ret < 0)
264 printf("error\n");
265 ret =xmlRegExecPushString(exec, BAD_CAST"a", NULL);
266 if (ret == 1)
267 printf("final\n");
268 else if (ret < 0)
269 printf("error\n");
270 ret =xmlRegExecPushString(exec, BAD_CAST"a", NULL);
271 if (ret == 1)
272 printf("final\n");
273 else if (ret < 0)
274 printf("error\n");
275 ret =xmlRegExecPushString(exec, BAD_CAST"a", NULL);
276 if (ret == 1)
277 printf("final\n");
278 else if (ret < 0)
279 printf("error\n");
280 if (ret == 0) {
281 ret = xmlRegExecPushString(exec, NULL, NULL);
282 if (ret == 1)
283 printf("final\n");
284 else if (ret < 0)
285 printf("error\n");
286 }
287 xmlRegFreeExecCtxt(exec);
288
289 /* free the regexp */
290 xmlRegFreeRegexp(regexp);
291 } else {
292 int i;
293
294 for (i = 1;i < argc;i++)
295 testRegexpFile(argv[i]);
296 }
297
298 xmlCleanupParser();
299 xmlMemoryDump();
300 return(0);
301}
302
303#else
304#include <stdio.h>
Daniel Veillarda9cce9c2003-09-29 13:20:24 +0000305int main(int argc ATTRIBUTE_UNUSED, char **argv ATTRIBUTE_UNUSED) {
Daniel Veillard4255d502002-04-16 15:50:10 +0000306 printf("%s : Automata support not compiled in\n", argv[0]);
307 return(0);
308}
309#endif /* LIBXML_AUTOMATA_ENABLED */