blob: c61273ff0fb4c16dd7075f0985b360fb0a33a0dd [file] [log] [blame]
sewardjd7a02db2008-12-12 08:07:49 +00001
2/*--------------------------------------------------------------------*/
3/*--- A simple sequence matching facility. ---*/
4/*--- m_seqmatch.c ---*/
5/*--------------------------------------------------------------------*/
6
7/*
8 This file is part of Valgrind, a dynamic binary instrumentation
9 framework.
10
sewardj9eecbbb2010-05-03 21:37:12 +000011 Copyright (C) 2008-2010 OpenWorks Ltd
sewardjd7a02db2008-12-12 08:07:49 +000012 info@open-works.co.uk
13
14 This program is free software; you can redistribute it and/or
15 modify it under the terms of the GNU General Public License as
16 published by the Free Software Foundation; either version 2 of the
17 License, or (at your option) any later version.
18
19 This program is distributed in the hope that it will be useful, but
20 WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 General Public License for more details.
23
24 You should have received a copy of the GNU General Public License
25 along with this program; if not, write to the Free Software
26 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
27 02111-1307, USA.
28
29 The GNU General Public License is contained in the file COPYING.
30*/
31
32#include "pub_core_basics.h"
33#include "pub_core_libcassert.h"
34#include "pub_core_libcbase.h" // VG_(strlen)
35#include "pub_core_seqmatch.h" // self
36
37/* ---------------------------------------------------------------------
38 A simple sequence matching facility
39 ------------------------------------------------------------------ */
40
41/* See detailed comment in include/pub_tool_seqmatch.h about this. */
42Bool VG_(generic_match) (
43 Bool matchAll,
44 void* patt, SizeT szbPatt, UWord nPatt, UWord ixPatt,
45 void* input, SizeT szbInput, UWord nInput, UWord ixInput,
46 Bool (*pIsStar)(void*),
47 Bool (*pIsQuery)(void*),
48 Bool (*pattEQinp)(void*,void*)
49 )
50{
51 /* This is the spec, written in my favourite formal specification
52 language. It specifies non-greedy matching of '*'s.
53
54 ma ('*':ps) (i:is) = ma ps (i:is) || ma ('*':ps) is
55 ma ('*':ps) [] = ma ps []
56
57 ma ('?':ps) (i:is) = ma ps is
58 ma ('?':ps) [] = False
59
60 ma (p:ps) (i:is) = p == i && ma ps is
61
62 ma (p:ps) [] = False
63 ma [] (i:is) = False -- m-all, True for m-prefix
64 ma [] [] = True
65 */
66 Bool havePatt, haveInput;
67 void *currPatt, *currInput;
68 tailcall:
69 vg_assert(nPatt >= 0 && nPatt < 1000000); /* arbitrary */
70 vg_assert(nInput >= 0 && nInput < 1000000); /* arbitrary */
71 vg_assert(ixPatt >= 0 && ixPatt <= nPatt);
72 vg_assert(ixInput >= 0 && ixInput <= nInput);
73
74 havePatt = ixPatt < nPatt;
75 haveInput = ixInput < nInput;
76
77 /* No specific need to set NULL when !have{Patt,Input}, but guards
78 against inadvertantly dereferencing an out of range pointer to
79 the pattern or input arrays. */
80 currPatt = havePatt ? ((Char*)patt) + szbPatt * ixPatt : NULL;
81 currInput = haveInput ? ((Char*)input) + szbInput * ixInput : NULL;
82
83 // Deal with the complex case first: wildcards. Do frugal
84 // matching. When encountering a '*', first skip no characters
85 // at all, and see if the rest of the match still works. Only if
86 // that fails do we then skip a character, and retry at the next
87 // position.
88 //
89 // ma ('*':ps) (i:is) = ma ps (i:is) || ma ('*':ps) is
90 //
91 // If we're out of input, check the rest of the pattern matches
92 // the empty input. This really means can only be be empty or
93 // composed entirely of '*'s.
94 //
95 // ma ('*':ps) [] = ma ps []
96 //
97 if (havePatt && pIsStar(currPatt)) {
98 if (haveInput) {
99 // ma ('*':ps) (i:is) = ma ps (i:is) || ma ('*':ps) is
100 // we unavoidably have to make a real recursive call for the
101 // first half of the OR, since this isn't straight tail-recursion.
102 if (VG_(generic_match)( matchAll,
103 patt, szbPatt, nPatt, ixPatt+1,
104 input,szbInput,nInput, ixInput+0,
105 pIsStar,pIsQuery,pattEQinp) ) {
106 return True;
107 }
108 // but we can tail-recurse for the second call
109 ixInput++; goto tailcall;
110 } else {
111 // ma ('*':ps) [] = ma ps []
112 ixPatt++; goto tailcall;
113 }
114 }
115
116 // simpler cases now. Deal with '?' wildcards.
117 //
118 // ma ('?':ps) (i:is) = ma ps is
119 // ma ('?':ps) [] = False
120 if (havePatt && pIsQuery(currPatt)) {
121 if (haveInput) {
122 ixPatt++; ixInput++; goto tailcall;
123 } else {
124 return False;
125 }
126 }
127
128 // obvious case with literal chars in the pattern
129 //
130 // ma (p:ps) (i:is) = p == i && ma ps is
131 if (havePatt && haveInput) {
132 if (!pattEQinp(currPatt,currInput)) return False;
133 ixPatt++; ixInput++; goto tailcall;
134 }
135
136 // if we run out of input before we run out of pattern, we must fail
137 // ma (p:ps) [] = False
138 if (havePatt && !haveInput) return False;
139
140 // if we run out of pattern before we run out of input, the
141 // verdict depends on the matching mode. If we are trying to
142 // match exactly (the pattern must consume the entire input)
143 // then the outcome is failure. However, if we're merely attempting
144 // to match some prefix of the input, then we have been successful.
145 //
146 // ma [] (i:is) = False -- m-all, True for m-prefix
147 if (!havePatt && haveInput) {
148 return matchAll ? False // match-all
149 : True; // match-prefix
150 }
151
152 // finally, if both sequence and input are both completely
153 // consumed, then we were successful, regardless of matching mode.
154 if (!havePatt && !haveInput) return True;
155
156 // end of cases
157 vg_assert(0);
158}
159
160
161/* And a parameterization of the above, to make it do
162 string matching.
163*/
164static Bool charIsStar ( void* pV ) { return *(Char*)pV == '*'; }
165static Bool charIsQuery ( void* pV ) { return *(Char*)pV == '?'; }
166static Bool char_p_EQ_i ( void* pV, void* cV ) {
167 Char p = *(Char*)pV;
168 Char c = *(Char*)cV;
169 vg_assert(p != '*' && p != '?');
170 return p == c;
171}
172Bool VG_(string_match) ( const Char* patt, const Char* input )
173{
174 return VG_(generic_match)(
175 True/* match-all */,
176 (void*)patt, sizeof(UChar), VG_(strlen)(patt), 0,
177 (void*)input, sizeof(UChar), VG_(strlen)(input), 0,
178 charIsStar, charIsQuery, char_p_EQ_i
179 );
180}
181
182
183// test cases for the matcher (in match-all mode)
184// typedef struct { char* patt; char* input; Bool xres; } Test;
185//
186//static Test tests[] =
187// {
188// { "" ,"" , True },
189// { "a" ,"" , False },
190// { "a" ,"b" , False },
191// { "a" ,"a" , True },
192// { "a" ,"aa" , False },
193// { "*" ,"" , True },
194// { "**" ,"" , True },
195// { "*" ,"abc", True },
196// { "*a" ,"abc", False },
197// { "*b" ,"abc", False },
198// { "*bc" ,"abc", True },
199// { "a*b" ,"abc", False },
200// { "a*c" ,"abc", True },
201// { "*c" ,"abc", True },
202// { "c*c" ,"abc", False },
203// { "abc*" ,"abc", True },
204// { "abc**" ,"abc", True },
205// { "**abc" ,"abc", True },
206// { "**a*b*c**" ,"abc", True },
207// { "**a*b*d**" ,"abc", False },
208// { "a?b" ,"abc", False },
209// { "a?c" ,"abc", True },
210// { "?" ,"" , False },
211// { "?" ,"a" , True },
212// { "?" ,"ab" , False },
213// { "abcd" ,"abc", False },
214// { "ab" ,"abc", False },
215// { NULL ,NULL , False }
216// };
217//
218//int main ( void )
219//{
220// Test* t;
221// for (t = tests; t->patt; t++) {
222// printf("%10s %6s %s\n",
223// t->patt, t->input,
224// match_string_all((UChar*)t->patt,(UChar*)t->input,True)
225// == t->xres
226// ? "pass" : "FAIL" );
227// }
228// return 0;
229//}
230
231/*--------------------------------------------------------------------*/
232/*--- end m_seqmatch.c ---*/
233/*--------------------------------------------------------------------*/