Daniel Veillard | 4255d50 | 2002-04-16 15:50:10 +0000 | [diff] [blame] | 1 | /* |
| 2 | * testRegexp.c: simple module for testing regular expressions |
| 3 | * |
| 4 | * See Copyright for the status of this software. |
| 5 | * |
| 6 | * Daniel Veillard <veillard@redhat.com> |
| 7 | */ |
| 8 | |
Daniel Veillard | 4255d50 | 2002-04-16 15:50:10 +0000 | [diff] [blame] | 9 | #include "libxml.h" |
| 10 | #ifdef LIBXML_REGEXP_ENABLED |
William M. Brack | c1099be | 2007-01-31 18:38:56 +0000 | [diff] [blame] | 11 | #include <string.h> |
| 12 | |
Daniel Veillard | 4255d50 | 2002-04-16 15:50:10 +0000 | [diff] [blame] | 13 | #include <libxml/tree.h> |
| 14 | #include <libxml/xmlregexp.h> |
| 15 | |
Daniel Veillard | 24505b0 | 2005-07-28 23:49:35 +0000 | [diff] [blame] | 16 | static int repeat = 0; |
| 17 | static int debug = 0; |
Daniel Veillard | 4255d50 | 2002-04-16 15:50:10 +0000 | [diff] [blame] | 18 | |
| 19 | static void testRegexp(xmlRegexpPtr comp, const char *value) { |
| 20 | int ret; |
| 21 | |
| 22 | ret = xmlRegexpExec(comp, (const xmlChar *) value); |
| 23 | if (ret == 1) |
| 24 | printf("%s: Ok\n", value); |
| 25 | else if (ret == 0) |
| 26 | printf("%s: Fail\n", value); |
| 27 | else |
| 28 | printf("%s: Error: %d\n", value, ret); |
| 29 | if (repeat) { |
| 30 | int j; |
| 31 | for (j = 0;j < 999999;j++) |
| 32 | xmlRegexpExec(comp, (const xmlChar *) value); |
| 33 | } |
| 34 | } |
| 35 | |
| 36 | static void |
| 37 | testRegexpFile(const char *filename) { |
| 38 | xmlRegexpPtr comp = NULL; |
| 39 | FILE *input; |
| 40 | char expression[5000]; |
| 41 | int len; |
| 42 | |
| 43 | input = fopen(filename, "r"); |
| 44 | if (input == NULL) { |
| 45 | xmlGenericError(xmlGenericErrorContext, |
| 46 | "Cannot open %s for reading\n", filename); |
| 47 | return; |
| 48 | } |
| 49 | while (fgets(expression, 4500, input) != NULL) { |
| 50 | len = strlen(expression); |
| 51 | len--; |
| 52 | while ((len >= 0) && |
| 53 | ((expression[len] == '\n') || (expression[len] == '\t') || |
| 54 | (expression[len] == '\r') || (expression[len] == ' '))) len--; |
| 55 | expression[len + 1] = 0; |
| 56 | if (len >= 0) { |
| 57 | if (expression[0] == '#') |
| 58 | continue; |
| 59 | if ((expression[0] == '=') && (expression[1] == '>')) { |
| 60 | char *pattern = &expression[2]; |
| 61 | |
| 62 | if (comp != NULL) { |
| 63 | xmlRegFreeRegexp(comp); |
| 64 | comp = NULL; |
| 65 | } |
| 66 | printf("Regexp: %s\n", pattern) ; |
| 67 | comp = xmlRegexpCompile((const xmlChar *) pattern); |
| 68 | if (comp == NULL) { |
| 69 | printf(" failed to compile\n"); |
| 70 | break; |
| 71 | } |
| 72 | } else if (comp == NULL) { |
| 73 | printf("Regexp: %s\n", expression) ; |
| 74 | comp = xmlRegexpCompile((const xmlChar *) expression); |
| 75 | if (comp == NULL) { |
| 76 | printf(" failed to compile\n"); |
| 77 | break; |
| 78 | } |
| 79 | } else if (comp != NULL) { |
| 80 | testRegexp(comp, expression); |
| 81 | } |
| 82 | } |
| 83 | } |
| 84 | fclose(input); |
| 85 | if (comp != NULL) |
| 86 | xmlRegFreeRegexp(comp); |
| 87 | } |
| 88 | |
Daniel Veillard | 465a000 | 2005-08-22 12:07:04 +0000 | [diff] [blame] | 89 | #ifdef LIBXML_EXPR_ENABLED |
| 90 | static void |
| 91 | runFileTest(xmlExpCtxtPtr ctxt, const char *filename) { |
| 92 | xmlExpNodePtr expr = NULL, sub; |
| 93 | FILE *input; |
| 94 | char expression[5000]; |
| 95 | int len; |
| 96 | |
| 97 | input = fopen(filename, "r"); |
| 98 | if (input == NULL) { |
| 99 | xmlGenericError(xmlGenericErrorContext, |
| 100 | "Cannot open %s for reading\n", filename); |
| 101 | return; |
| 102 | } |
| 103 | while (fgets(expression, 4500, input) != NULL) { |
| 104 | len = strlen(expression); |
| 105 | len--; |
| 106 | while ((len >= 0) && |
| 107 | ((expression[len] == '\n') || (expression[len] == '\t') || |
| 108 | (expression[len] == '\r') || (expression[len] == ' '))) len--; |
| 109 | expression[len + 1] = 0; |
| 110 | if (len >= 0) { |
| 111 | if (expression[0] == '#') |
| 112 | continue; |
| 113 | if ((expression[0] == '=') && (expression[1] == '>')) { |
| 114 | char *str = &expression[2]; |
| 115 | |
| 116 | if (expr != NULL) { |
| 117 | xmlExpFree(ctxt, expr); |
| 118 | if (xmlExpCtxtNbNodes(ctxt) != 0) |
| 119 | printf(" Parse/free of Expression leaked %d\n", |
| 120 | xmlExpCtxtNbNodes(ctxt)); |
| 121 | expr = NULL; |
| 122 | } |
| 123 | printf("Expression: %s\n", str) ; |
| 124 | expr = xmlExpParse(ctxt, str); |
| 125 | if (expr == NULL) { |
| 126 | printf(" parsing Failed\n"); |
| 127 | break; |
| 128 | } |
| 129 | } else if (expr != NULL) { |
| 130 | int expect = -1; |
| 131 | int nodes1, nodes2; |
| 132 | |
| 133 | if (expression[0] == '0') |
| 134 | expect = 0; |
| 135 | if (expression[0] == '1') |
| 136 | expect = 1; |
| 137 | printf("Subexp: %s", expression + 2) ; |
| 138 | nodes1 = xmlExpCtxtNbNodes(ctxt); |
| 139 | sub = xmlExpParse(ctxt, expression + 2); |
| 140 | if (sub == NULL) { |
| 141 | printf(" parsing Failed\n"); |
| 142 | break; |
| 143 | } else { |
| 144 | int ret; |
| 145 | |
| 146 | nodes2 = xmlExpCtxtNbNodes(ctxt); |
| 147 | ret = xmlExpSubsume(ctxt, expr, sub); |
| 148 | |
| 149 | if ((expect == 1) && (ret == 1)) { |
| 150 | printf(" => accept, Ok\n"); |
| 151 | } else if ((expect == 0) && (ret == 0)) { |
| 152 | printf(" => reject, Ok\n"); |
| 153 | } else if ((expect == 1) && (ret == 0)) { |
| 154 | printf(" => reject, Failed\n"); |
| 155 | } else if ((expect == 0) && (ret == 1)) { |
| 156 | printf(" => accept, Failed\n"); |
| 157 | } else { |
| 158 | printf(" => fail internally\n"); |
| 159 | } |
| 160 | if (xmlExpCtxtNbNodes(ctxt) > nodes2) { |
| 161 | printf(" Subsume leaked %d\n", |
| 162 | xmlExpCtxtNbNodes(ctxt) - nodes2); |
| 163 | nodes1 += xmlExpCtxtNbNodes(ctxt) - nodes2; |
| 164 | } |
| 165 | xmlExpFree(ctxt, sub); |
| 166 | if (xmlExpCtxtNbNodes(ctxt) > nodes1) { |
| 167 | printf(" Parse/free leaked %d\n", |
| 168 | xmlExpCtxtNbNodes(ctxt) - nodes1); |
| 169 | } |
| 170 | } |
| 171 | |
| 172 | } |
| 173 | } |
| 174 | } |
| 175 | if (expr != NULL) { |
| 176 | xmlExpFree(ctxt, expr); |
| 177 | if (xmlExpCtxtNbNodes(ctxt) != 0) |
| 178 | printf(" Parse/free of Expression leaked %d\n", |
| 179 | xmlExpCtxtNbNodes(ctxt)); |
| 180 | } |
| 181 | fclose(input); |
| 182 | } |
Daniel Veillard | 0090bd5 | 2005-08-22 14:43:43 +0000 | [diff] [blame] | 183 | |
| 184 | static void |
| 185 | testReduce(xmlExpCtxtPtr ctxt, xmlExpNodePtr expr, const char *tst) { |
| 186 | xmlBufferPtr xmlExpBuf; |
| 187 | xmlExpNodePtr sub, deriv; |
| 188 | xmlExpBuf = xmlBufferCreate(); |
| 189 | |
| 190 | sub = xmlExpParse(ctxt, tst); |
| 191 | if (sub == NULL) { |
| 192 | printf("Subset %s failed to parse\n", tst); |
| 193 | return; |
| 194 | } |
Daniel Veillard | a418122 | 2005-08-22 15:50:57 +0000 | [diff] [blame] | 195 | xmlExpDump(xmlExpBuf, sub); |
Daniel Veillard | 0090bd5 | 2005-08-22 14:43:43 +0000 | [diff] [blame] | 196 | printf("Subset parsed as: %s\n", |
| 197 | (const char *) xmlBufferContent(xmlExpBuf)); |
| 198 | deriv = xmlExpExpDerive(ctxt, expr, sub); |
| 199 | if (deriv == NULL) { |
| 200 | printf("Derivation led to an internal error, report this !\n"); |
| 201 | return; |
| 202 | } else { |
| 203 | xmlBufferEmpty(xmlExpBuf); |
| 204 | xmlExpDump(xmlExpBuf, deriv); |
| 205 | if (xmlExpIsNillable(deriv)) |
| 206 | printf("Resulting nillable derivation: %s\n", |
| 207 | (const char *) xmlBufferContent(xmlExpBuf)); |
| 208 | else |
| 209 | printf("Resulting derivation: %s\n", |
| 210 | (const char *) xmlBufferContent(xmlExpBuf)); |
| 211 | xmlExpFree(ctxt, deriv); |
| 212 | } |
| 213 | xmlExpFree(ctxt, sub); |
| 214 | } |
| 215 | |
| 216 | static void |
| 217 | exprDebug(xmlExpCtxtPtr ctxt, xmlExpNodePtr expr) { |
| 218 | xmlBufferPtr xmlExpBuf; |
| 219 | xmlExpNodePtr deriv; |
| 220 | const char *list[40]; |
| 221 | int ret; |
| 222 | |
| 223 | xmlExpBuf = xmlBufferCreate(); |
| 224 | |
| 225 | if (expr == NULL) { |
| 226 | printf("Failed to parse\n"); |
| 227 | return; |
| 228 | } |
| 229 | xmlExpDump(xmlExpBuf, expr); |
| 230 | printf("Parsed as: %s\n", (const char *) xmlBufferContent(xmlExpBuf)); |
| 231 | printf("Max token input = %d\n", xmlExpMaxToken(expr)); |
| 232 | if (xmlExpIsNillable(expr) == 1) |
| 233 | printf("Is nillable\n"); |
| 234 | ret = xmlExpGetLanguage(ctxt, expr, (const xmlChar **) &list[0], 40); |
| 235 | if (ret < 0) |
| 236 | printf("Failed to get list: %d\n", ret); |
| 237 | else { |
| 238 | int i; |
| 239 | |
| 240 | printf("Language has %d strings, testing string derivations\n", ret); |
| 241 | for (i = 0;i < ret;i++) { |
| 242 | deriv = xmlExpStringDerive(ctxt, expr, BAD_CAST list[i], -1); |
| 243 | if (deriv == NULL) { |
| 244 | printf(" %s -> derivation failed\n", list[i]); |
| 245 | } else { |
| 246 | xmlBufferEmpty(xmlExpBuf); |
| 247 | xmlExpDump(xmlExpBuf, deriv); |
| 248 | printf(" %s -> %s\n", list[i], |
| 249 | (const char *) xmlBufferContent(xmlExpBuf)); |
| 250 | } |
| 251 | xmlExpFree(ctxt, deriv); |
| 252 | } |
| 253 | } |
| 254 | xmlBufferFree(xmlExpBuf); |
| 255 | } |
Daniel Veillard | 465a000 | 2005-08-22 12:07:04 +0000 | [diff] [blame] | 256 | #endif |
Daniel Veillard | 4255d50 | 2002-04-16 15:50:10 +0000 | [diff] [blame] | 257 | |
| 258 | static void usage(const char *name) { |
Daniel Veillard | 465a000 | 2005-08-22 12:07:04 +0000 | [diff] [blame] | 259 | fprintf(stderr, "Usage: %s [flags]\n", name); |
| 260 | fprintf(stderr, "Testing tool for libxml2 string and pattern regexps\n"); |
| 261 | fprintf(stderr, " --debug: switch on debugging\n"); |
| 262 | fprintf(stderr, " --repeat: loop on the operation\n"); |
| 263 | #ifdef LIBXML_EXPR_ENABLED |
| 264 | fprintf(stderr, " --expr: test xmlExp and not xmlRegexp\n"); |
| 265 | #endif |
| 266 | fprintf(stderr, " --input filename: use the given filename for regexp\n"); |
| 267 | fprintf(stderr, " --input filename: use the given filename for exp\n"); |
Daniel Veillard | 4255d50 | 2002-04-16 15:50:10 +0000 | [diff] [blame] | 268 | } |
| 269 | |
| 270 | int main(int argc, char **argv) { |
| 271 | xmlRegexpPtr comp = NULL; |
Daniel Veillard | 465a000 | 2005-08-22 12:07:04 +0000 | [diff] [blame] | 272 | #ifdef LIBXML_EXPR_ENABLED |
| 273 | xmlExpNodePtr expr = NULL; |
| 274 | int use_exp = 0; |
| 275 | xmlExpCtxtPtr ctxt = NULL; |
| 276 | #endif |
Daniel Veillard | 4255d50 | 2002-04-16 15:50:10 +0000 | [diff] [blame] | 277 | const char *pattern = NULL; |
| 278 | char *filename = NULL; |
| 279 | int i; |
| 280 | |
| 281 | xmlInitMemory(); |
| 282 | |
| 283 | if (argc <= 1) { |
| 284 | usage(argv[0]); |
| 285 | return(1); |
| 286 | } |
| 287 | for (i = 1; i < argc ; i++) { |
| 288 | if (!strcmp(argv[i], "-")) |
| 289 | break; |
| 290 | |
| 291 | if (argv[i][0] != '-') |
| 292 | continue; |
Daniel Veillard | 3066351 | 2008-02-21 22:31:55 +0000 | [diff] [blame] | 293 | if (!strcmp(argv[i], "--")) |
| 294 | break; |
| 295 | |
Daniel Veillard | 4255d50 | 2002-04-16 15:50:10 +0000 | [diff] [blame] | 296 | if ((!strcmp(argv[i], "-debug")) || (!strcmp(argv[i], "--debug"))) { |
| 297 | debug++; |
| 298 | } else if ((!strcmp(argv[i], "-repeat")) || |
| 299 | (!strcmp(argv[i], "--repeat"))) { |
| 300 | repeat++; |
Daniel Veillard | 465a000 | 2005-08-22 12:07:04 +0000 | [diff] [blame] | 301 | #ifdef LIBXML_EXPR_ENABLED |
| 302 | } else if ((!strcmp(argv[i], "-expr")) || |
| 303 | (!strcmp(argv[i], "--expr"))) { |
| 304 | use_exp++; |
| 305 | #endif |
| 306 | } else if ((!strcmp(argv[i], "-i")) || (!strcmp(argv[i], "-f")) || |
| 307 | (!strcmp(argv[i], "--input"))) |
Daniel Veillard | 4255d50 | 2002-04-16 15:50:10 +0000 | [diff] [blame] | 308 | filename = argv[++i]; |
| 309 | else { |
| 310 | fprintf(stderr, "Unknown option %s\n", argv[i]); |
| 311 | usage(argv[0]); |
| 312 | } |
| 313 | } |
Daniel Veillard | 465a000 | 2005-08-22 12:07:04 +0000 | [diff] [blame] | 314 | |
| 315 | #ifdef LIBXML_EXPR_ENABLED |
| 316 | if (use_exp) |
| 317 | ctxt = xmlExpNewCtxt(0, NULL); |
| 318 | #endif |
| 319 | |
Daniel Veillard | 4255d50 | 2002-04-16 15:50:10 +0000 | [diff] [blame] | 320 | if (filename != NULL) { |
Daniel Veillard | 465a000 | 2005-08-22 12:07:04 +0000 | [diff] [blame] | 321 | #ifdef LIBXML_EXPR_ENABLED |
| 322 | if (use_exp) |
| 323 | runFileTest(ctxt, filename); |
| 324 | else |
| 325 | #endif |
| 326 | testRegexpFile(filename); |
Daniel Veillard | 4255d50 | 2002-04-16 15:50:10 +0000 | [diff] [blame] | 327 | } else { |
Daniel Veillard | 3066351 | 2008-02-21 22:31:55 +0000 | [diff] [blame] | 328 | int data = 0; |
Daniel Veillard | 0090bd5 | 2005-08-22 14:43:43 +0000 | [diff] [blame] | 329 | #ifdef LIBXML_EXPR_ENABLED |
Daniel Veillard | 3066351 | 2008-02-21 22:31:55 +0000 | [diff] [blame] | 330 | |
Daniel Veillard | 0090bd5 | 2005-08-22 14:43:43 +0000 | [diff] [blame] | 331 | if (use_exp) { |
| 332 | for (i = 1; i < argc ; i++) { |
Daniel Veillard | 3066351 | 2008-02-21 22:31:55 +0000 | [diff] [blame] | 333 | if (strcmp(argv[i], "--") == 0) |
| 334 | data = 1; |
| 335 | else if ((argv[i][0] != '-') || (strcmp(argv[i], "-") == 0) || |
| 336 | (data == 1)) { |
Daniel Veillard | 0090bd5 | 2005-08-22 14:43:43 +0000 | [diff] [blame] | 337 | if (pattern == NULL) { |
| 338 | pattern = argv[i]; |
| 339 | printf("Testing expr %s:\n", pattern); |
| 340 | expr = xmlExpParse(ctxt, pattern); |
| 341 | if (expr == NULL) { |
| 342 | printf(" failed to compile\n"); |
| 343 | break; |
| 344 | } |
| 345 | if (debug) { |
| 346 | exprDebug(ctxt, expr); |
| 347 | } |
| 348 | } else { |
| 349 | testReduce(ctxt, expr, argv[i]); |
Daniel Veillard | 4255d50 | 2002-04-16 15:50:10 +0000 | [diff] [blame] | 350 | } |
Daniel Veillard | 4255d50 | 2002-04-16 15:50:10 +0000 | [diff] [blame] | 351 | } |
| 352 | } |
Daniel Veillard | 3066351 | 2008-02-21 22:31:55 +0000 | [diff] [blame] | 353 | if (expr != NULL) { |
Daniel Veillard | 0090bd5 | 2005-08-22 14:43:43 +0000 | [diff] [blame] | 354 | xmlExpFree(ctxt, expr); |
Daniel Veillard | 3066351 | 2008-02-21 22:31:55 +0000 | [diff] [blame] | 355 | expr = NULL; |
| 356 | } |
Daniel Veillard | 0090bd5 | 2005-08-22 14:43:43 +0000 | [diff] [blame] | 357 | } else |
| 358 | #endif |
| 359 | { |
| 360 | for (i = 1; i < argc ; i++) { |
Daniel Veillard | 3066351 | 2008-02-21 22:31:55 +0000 | [diff] [blame] | 361 | if (strcmp(argv[i], "--") == 0) |
| 362 | data = 1; |
| 363 | else if ((argv[i][0] != '-') || (strcmp(argv[i], "-") == 0) || |
| 364 | (data == 1)) { |
Daniel Veillard | 0090bd5 | 2005-08-22 14:43:43 +0000 | [diff] [blame] | 365 | if (pattern == NULL) { |
| 366 | pattern = argv[i]; |
| 367 | printf("Testing %s:\n", pattern); |
| 368 | comp = xmlRegexpCompile((const xmlChar *) pattern); |
| 369 | if (comp == NULL) { |
| 370 | printf(" failed to compile\n"); |
| 371 | break; |
| 372 | } |
| 373 | if (debug) |
| 374 | xmlRegexpPrint(stdout, comp); |
| 375 | } else { |
| 376 | testRegexp(comp, argv[i]); |
| 377 | } |
| 378 | } |
| 379 | } |
| 380 | if (comp != NULL) |
| 381 | xmlRegFreeRegexp(comp); |
| 382 | } |
Daniel Veillard | 4255d50 | 2002-04-16 15:50:10 +0000 | [diff] [blame] | 383 | } |
Daniel Veillard | 465a000 | 2005-08-22 12:07:04 +0000 | [diff] [blame] | 384 | #ifdef LIBXML_EXPR_ENABLED |
| 385 | if (ctxt != NULL) { |
| 386 | printf("Ops: %d nodes, %d cons\n", |
| 387 | xmlExpCtxtNbNodes(ctxt), xmlExpCtxtNbCons(ctxt)); |
| 388 | xmlExpFreeCtxt(ctxt); |
| 389 | } |
| 390 | #endif |
Daniel Veillard | 4255d50 | 2002-04-16 15:50:10 +0000 | [diff] [blame] | 391 | xmlCleanupParser(); |
Daniel Veillard | 99c394d | 2005-07-14 12:58:49 +0000 | [diff] [blame] | 392 | xmlMemoryDump(); |
Daniel Veillard | 4255d50 | 2002-04-16 15:50:10 +0000 | [diff] [blame] | 393 | return(0); |
| 394 | } |
| 395 | |
| 396 | #else |
| 397 | #include <stdio.h> |
Daniel Veillard | a9cce9c | 2003-09-29 13:20:24 +0000 | [diff] [blame] | 398 | int main(int argc ATTRIBUTE_UNUSED, char **argv ATTRIBUTE_UNUSED) { |
Daniel Veillard | 4255d50 | 2002-04-16 15:50:10 +0000 | [diff] [blame] | 399 | printf("%s : Regexp support not compiled in\n", argv[0]); |
| 400 | return(0); |
| 401 | } |
| 402 | #endif /* LIBXML_REGEXP_ENABLED */ |