blob: a257bb4dcbf20d9e08b712322744b0579541ca2f [file] [log] [blame]
Daniel Veillard4255d502002-04-16 15:50:10 +00001/*
Daniel Veillardbe586972003-11-18 20:56:51 +00002 * Summary: regular expressions handling
3 * Description: basic API for libxml regular expressions handling used
4 * for XML Schemas and validation.
Daniel Veillard4255d502002-04-16 15:50:10 +00005 *
Daniel Veillardbe586972003-11-18 20:56:51 +00006 * Copy: See Copyright for the status of this software.
Daniel Veillard4255d502002-04-16 15:50:10 +00007 *
Daniel Veillardbe586972003-11-18 20:56:51 +00008 * Author: Daniel Veillard
Daniel Veillard4255d502002-04-16 15:50:10 +00009 */
10
11#ifndef __XML_REGEXP_H__
12#define __XML_REGEXP_H__
13
Daniel Veillard4255d502002-04-16 15:50:10 +000014#include <libxml/xmlversion.h>
Daniel Veillard4255d502002-04-16 15:50:10 +000015
Igor Zlatkovic7ae91bc2002-11-08 17:18:52 +000016#ifdef LIBXML_REGEXP_ENABLED
Daniel Veillard4255d502002-04-16 15:50:10 +000017
18#ifdef __cplusplus
19extern "C" {
20#endif
21
22/**
23 * xmlRegexpPtr:
24 *
25 * A libxml regular expression, they can actually be far more complex
26 * thank the POSIX regex expressions.
27 */
28typedef struct _xmlRegexp xmlRegexp;
29typedef xmlRegexp *xmlRegexpPtr;
30
31/**
32 * xmlRegExecCtxtPtr:
33 *
34 * A libxml progressive regular expression evaluation context
35 */
36typedef struct _xmlRegExecCtxt xmlRegExecCtxt;
37typedef xmlRegExecCtxt *xmlRegExecCtxtPtr;
38
Daniel Veillard84d70a42002-09-16 10:51:38 +000039#ifdef __cplusplus
40}
41#endif
42#include <libxml/tree.h>
Daniel Veillard465a0002005-08-22 12:07:04 +000043#include <libxml/dict.h>
Daniel Veillard84d70a42002-09-16 10:51:38 +000044#ifdef __cplusplus
45extern "C" {
46#endif
47
Daniel Veillard4255d502002-04-16 15:50:10 +000048/*
49 * The POSIX like API
50 */
Igor Zlatkovic76874e42003-08-25 09:05:12 +000051XMLPUBFUN xmlRegexpPtr XMLCALL
Igor Zlatkovicaa3cfbd2003-08-27 08:59:58 +000052 xmlRegexpCompile (const xmlChar *regexp);
53XMLPUBFUN void XMLCALL xmlRegFreeRegexp(xmlRegexpPtr regexp);
54XMLPUBFUN int XMLCALL
55 xmlRegexpExec (xmlRegexpPtr comp,
Daniel Veillard4255d502002-04-16 15:50:10 +000056 const xmlChar *value);
Igor Zlatkovicaa3cfbd2003-08-27 08:59:58 +000057XMLPUBFUN void XMLCALL
58 xmlRegexpPrint (FILE *output,
Daniel Veillard4255d502002-04-16 15:50:10 +000059 xmlRegexpPtr regexp);
Igor Zlatkovicaa3cfbd2003-08-27 08:59:58 +000060XMLPUBFUN int XMLCALL
61 xmlRegexpIsDeterminist(xmlRegexpPtr comp);
Daniel Veillard4255d502002-04-16 15:50:10 +000062
63/*
64 * Callback function when doing a transition in the automata
65 */
66typedef void (*xmlRegExecCallbacks) (xmlRegExecCtxtPtr exec,
67 const xmlChar *token,
68 void *transdata,
69 void *inputdata);
70
71/*
72 * The progressive API
73 */
Igor Zlatkovicaa3cfbd2003-08-27 08:59:58 +000074XMLPUBFUN xmlRegExecCtxtPtr XMLCALL
75 xmlRegNewExecCtxt (xmlRegexpPtr comp,
76 xmlRegExecCallbacks callback,
77 void *data);
78XMLPUBFUN void XMLCALL
79 xmlRegFreeExecCtxt (xmlRegExecCtxtPtr exec);
80XMLPUBFUN int XMLCALL
81 xmlRegExecPushString(xmlRegExecCtxtPtr exec,
82 const xmlChar *value,
83 void *data);
84XMLPUBFUN int XMLCALL
85 xmlRegExecPushString2(xmlRegExecCtxtPtr exec,
86 const xmlChar *value,
87 const xmlChar *value2,
88 void *data);
Daniel Veillard4255d502002-04-16 15:50:10 +000089
Daniel Veillardfc0b6f62005-01-09 17:48:02 +000090XMLPUBFUN int XMLCALL
91 xmlRegExecNextValues(xmlRegExecCtxtPtr exec,
92 int *nbval,
Daniel Veillardcc026dc2005-01-12 13:21:17 +000093 int *nbneg,
Daniel Veillardfc0b6f62005-01-09 17:48:02 +000094 xmlChar **values,
95 int *terminal);
96XMLPUBFUN int XMLCALL
97 xmlRegExecErrInfo (xmlRegExecCtxtPtr exec,
98 const xmlChar **string,
99 int *nbval,
Daniel Veillardcc026dc2005-01-12 13:21:17 +0000100 int *nbneg,
Daniel Veillardfc0b6f62005-01-09 17:48:02 +0000101 xmlChar **values,
102 int *terminal);
Daniel Veillard81a8ec62005-08-22 00:20:58 +0000103#ifdef LIBXML_EXPR_ENABLED
104/*
105 * Formal regular expression handling
106 * Its goal is to do some formal work on content models
107 */
108
109/* expressions are used within a context */
110typedef struct _xmlExpCtxt xmlExpCtxt;
111typedef xmlExpCtxt *xmlExpCtxtPtr;
112
113XMLPUBFUN void XMLCALL
114 xmlExpFreeCtxt (xmlExpCtxtPtr ctxt);
115XMLPUBFUN xmlExpCtxtPtr XMLCALL
116 xmlExpNewCtxt (int maxNodes,
117 xmlDictPtr dict);
118
Daniel Veillard465a0002005-08-22 12:07:04 +0000119XMLPUBFUN int XMLCALL
120 xmlExpCtxtNbNodes(xmlExpCtxtPtr ctxt);
121XMLPUBFUN int XMLCALL
122 xmlExpCtxtNbCons(xmlExpCtxtPtr ctxt);
123
Daniel Veillard81a8ec62005-08-22 00:20:58 +0000124/* Expressions are trees but the tree is opaque */
125typedef struct _xmlExpNode xmlExpNode;
126typedef xmlExpNode *xmlExpNodePtr;
127
Daniel Veillard465a0002005-08-22 12:07:04 +0000128typedef enum {
129 XML_EXP_EMPTY = 0,
130 XML_EXP_FORBID = 1,
131 XML_EXP_ATOM = 2,
132 XML_EXP_SEQ = 3,
133 XML_EXP_OR = 4,
134 XML_EXP_COUNT = 5
135} xmlExpNodeType;
136
Daniel Veillard81a8ec62005-08-22 00:20:58 +0000137/*
138 * 2 core expressions shared by all for the empty language set
139 * and for the set with just the empty token
140 */
141XMLPUBVAR xmlExpNodePtr forbiddenExp;
142XMLPUBVAR xmlExpNodePtr emptyExp;
143
144/*
145 * Expressions are reference counted internally
146 */
147XMLPUBFUN void XMLCALL
148 xmlExpFree (xmlExpCtxtPtr ctxt,
Daniel Veillard465a0002005-08-22 12:07:04 +0000149 xmlExpNodePtr expr);
Daniel Veillard81a8ec62005-08-22 00:20:58 +0000150XMLPUBFUN void XMLCALL
Daniel Veillard465a0002005-08-22 12:07:04 +0000151 xmlExpRef (xmlExpNodePtr expr);
152
153/*
154 * constructors can be either manual or from a string
155 */
156XMLPUBFUN xmlExpNodePtr XMLCALL
157 xmlExpParse (xmlExpCtxtPtr ctxt,
158 const char *expr);
Daniel Veillardccb4d412005-08-23 13:41:17 +0000159XMLPUBFUN xmlExpNodePtr XMLCALL
160 xmlExpNewAtom (xmlExpCtxtPtr ctxt,
161 const xmlChar *name,
162 int len);
163XMLPUBFUN xmlExpNodePtr XMLCALL
164 xmlExpNewOr (xmlExpCtxtPtr ctxt,
165 xmlExpNodePtr left,
166 xmlExpNodePtr right);
167XMLPUBFUN xmlExpNodePtr XMLCALL
168 xmlExpNewSeq (xmlExpCtxtPtr ctxt,
169 xmlExpNodePtr left,
170 xmlExpNodePtr right);
171XMLPUBFUN xmlExpNodePtr XMLCALL
172 xmlExpNewRange (xmlExpCtxtPtr ctxt,
173 xmlExpNodePtr subset,
174 int min,
175 int max);
Daniel Veillard465a0002005-08-22 12:07:04 +0000176/*
177 * The really interesting APIs
178 */
Daniel Veillard81a8ec62005-08-22 00:20:58 +0000179XMLPUBFUN int XMLCALL
Daniel Veillard465a0002005-08-22 12:07:04 +0000180 xmlExpIsNillable(xmlExpNodePtr expr);
181XMLPUBFUN int XMLCALL
182 xmlExpMaxToken (xmlExpNodePtr expr);
Daniel Veillard81a8ec62005-08-22 00:20:58 +0000183XMLPUBFUN int XMLCALL
184 xmlExpGetLanguage(xmlExpCtxtPtr ctxt,
Daniel Veillard465a0002005-08-22 12:07:04 +0000185 xmlExpNodePtr expr,
Daniel Veillard81a8ec62005-08-22 00:20:58 +0000186 const xmlChar**list,
187 int len);
188XMLPUBFUN int XMLCALL
189 xmlExpGetStart (xmlExpCtxtPtr ctxt,
Daniel Veillard465a0002005-08-22 12:07:04 +0000190 xmlExpNodePtr expr,
Daniel Veillard81a8ec62005-08-22 00:20:58 +0000191 const xmlChar**list,
192 int len);
193XMLPUBFUN xmlExpNodePtr XMLCALL
194 xmlExpStringDerive(xmlExpCtxtPtr ctxt,
Daniel Veillard465a0002005-08-22 12:07:04 +0000195 xmlExpNodePtr expr,
Daniel Veillard81a8ec62005-08-22 00:20:58 +0000196 const xmlChar *str,
197 int len);
Daniel Veillard0090bd52005-08-22 14:43:43 +0000198XMLPUBFUN xmlExpNodePtr XMLCALL
199 xmlExpExpDerive (xmlExpCtxtPtr ctxt,
Daniel Veillardbca3ad22005-08-23 22:14:02 +0000200 xmlExpNodePtr expr,
Daniel Veillard0090bd52005-08-22 14:43:43 +0000201 xmlExpNodePtr sub);
Daniel Veillard81a8ec62005-08-22 00:20:58 +0000202XMLPUBFUN int XMLCALL
203 xmlExpSubsume (xmlExpCtxtPtr ctxt,
Daniel Veillard465a0002005-08-22 12:07:04 +0000204 xmlExpNodePtr expr,
Daniel Veillard81a8ec62005-08-22 00:20:58 +0000205 xmlExpNodePtr sub);
Daniel Veillard465a0002005-08-22 12:07:04 +0000206XMLPUBFUN void XMLCALL
207 xmlExpDump (xmlBufferPtr buf,
Daniel Veillardbca3ad22005-08-23 22:14:02 +0000208 xmlExpNodePtr expr);
Daniel Veillard81a8ec62005-08-22 00:20:58 +0000209#endif /* LIBXML_EXPR_ENABLED */
Daniel Veillard4255d502002-04-16 15:50:10 +0000210#ifdef __cplusplus
211}
212#endif
213
214#endif /* LIBXML_REGEXP_ENABLED */
215
216#endif /*__XML_REGEXP_H__ */