blob: 9e0f4378fd5d8705763cde6fc7bd86586f6fc80c [file] [log] [blame]
sewardjd7a02db2008-12-12 08:07:49 +00001
2/*--------------------------------------------------------------------*/
3/*--- A simple sequence matching facility. ---*/
4/*--- m_seqmatch.c ---*/
5/*--------------------------------------------------------------------*/
6
7/*
8 This file is part of Valgrind, a dynamic binary instrumentation
9 framework.
10
Elliott Hughesed398002017-06-21 14:41:24 -070011 Copyright (C) 2008-2017 OpenWorks Ltd
sewardjd7a02db2008-12-12 08:07:49 +000012 info@open-works.co.uk
13
14 This program is free software; you can redistribute it and/or
15 modify it under the terms of the GNU General Public License as
16 published by the Free Software Foundation; either version 2 of the
17 License, or (at your option) any later version.
18
19 This program is distributed in the hope that it will be useful, but
20 WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 General Public License for more details.
23
24 You should have received a copy of the GNU General Public License
25 along with this program; if not, write to the Free Software
26 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
27 02111-1307, USA.
28
29 The GNU General Public License is contained in the file COPYING.
30*/
31
32#include "pub_core_basics.h"
33#include "pub_core_libcassert.h"
34#include "pub_core_libcbase.h" // VG_(strlen)
35#include "pub_core_seqmatch.h" // self
36
37/* ---------------------------------------------------------------------
38 A simple sequence matching facility
39 ------------------------------------------------------------------ */
40
41/* See detailed comment in include/pub_tool_seqmatch.h about this. */
42Bool VG_(generic_match) (
43 Bool matchAll,
florian3e798632012-11-24 19:41:54 +000044 const void* patt, SizeT szbPatt, UWord nPatt, UWord ixPatt,
45 const void* input, SizeT szbInput, UWord nInput, UWord ixInput,
46 Bool (*pIsStar)(const void*),
47 Bool (*pIsQuery)(const void*),
48 Bool (*pattEQinp)(const void*,const void*,void*,UWord),
philippea0a73932014-06-15 15:42:20 +000049 void* inputCompleter, Bool (*haveInputInpC)(void*,UWord)
sewardjd7a02db2008-12-12 08:07:49 +000050 )
51{
52 /* This is the spec, written in my favourite formal specification
53 language. It specifies non-greedy matching of '*'s.
54
55 ma ('*':ps) (i:is) = ma ps (i:is) || ma ('*':ps) is
56 ma ('*':ps) [] = ma ps []
57
58 ma ('?':ps) (i:is) = ma ps is
59 ma ('?':ps) [] = False
60
61 ma (p:ps) (i:is) = p == i && ma ps is
62
63 ma (p:ps) [] = False
64 ma [] (i:is) = False -- m-all, True for m-prefix
65 ma [] [] = True
66 */
67 Bool havePatt, haveInput;
florian3e798632012-11-24 19:41:54 +000068 const HChar *currPatt, *currInput;
sewardjd7a02db2008-12-12 08:07:49 +000069 tailcall:
philippea0a73932014-06-15 15:42:20 +000070 vg_assert(nPatt >= 0 && nPatt < 1000000); /* arbitrary */
71 vg_assert(inputCompleter
72 || (nInput >= 0 && nInput < 1000000)); /* arbitrary */
sewardjd7a02db2008-12-12 08:07:49 +000073 vg_assert(ixPatt >= 0 && ixPatt <= nPatt);
philippea0a73932014-06-15 15:42:20 +000074 vg_assert(ixInput >= 0 && (inputCompleter || ixInput <= nInput));
sewardjd7a02db2008-12-12 08:07:49 +000075
76 havePatt = ixPatt < nPatt;
philippea0a73932014-06-15 15:42:20 +000077 haveInput = inputCompleter ?
78 (*haveInputInpC)(inputCompleter, ixInput)
79 : ixInput < nInput;
sewardjd7a02db2008-12-12 08:07:49 +000080
81 /* No specific need to set NULL when !have{Patt,Input}, but guards
82 against inadvertantly dereferencing an out of range pointer to
83 the pattern or input arrays. */
florian3e798632012-11-24 19:41:54 +000084 currPatt = havePatt ? ((const HChar*)patt) + szbPatt * ixPatt : NULL;
philippea0a73932014-06-15 15:42:20 +000085 currInput = haveInput && !inputCompleter ?
86 ((const HChar*)input) + szbInput * ixInput : NULL;
sewardjd7a02db2008-12-12 08:07:49 +000087
88 // Deal with the complex case first: wildcards. Do frugal
89 // matching. When encountering a '*', first skip no characters
90 // at all, and see if the rest of the match still works. Only if
91 // that fails do we then skip a character, and retry at the next
92 // position.
93 //
94 // ma ('*':ps) (i:is) = ma ps (i:is) || ma ('*':ps) is
95 //
96 // If we're out of input, check the rest of the pattern matches
97 // the empty input. This really means can only be be empty or
98 // composed entirely of '*'s.
99 //
100 // ma ('*':ps) [] = ma ps []
101 //
102 if (havePatt && pIsStar(currPatt)) {
103 if (haveInput) {
104 // ma ('*':ps) (i:is) = ma ps (i:is) || ma ('*':ps) is
105 // we unavoidably have to make a real recursive call for the
106 // first half of the OR, since this isn't straight tail-recursion.
107 if (VG_(generic_match)( matchAll,
108 patt, szbPatt, nPatt, ixPatt+1,
109 input,szbInput,nInput, ixInput+0,
philippe13a59522012-08-03 23:11:39 +0000110 pIsStar,pIsQuery,pattEQinp,
philippea0a73932014-06-15 15:42:20 +0000111 inputCompleter,haveInputInpC) ) {
sewardjd7a02db2008-12-12 08:07:49 +0000112 return True;
113 }
114 // but we can tail-recurse for the second call
115 ixInput++; goto tailcall;
116 } else {
117 // ma ('*':ps) [] = ma ps []
118 ixPatt++; goto tailcall;
119 }
120 }
121
122 // simpler cases now. Deal with '?' wildcards.
123 //
124 // ma ('?':ps) (i:is) = ma ps is
125 // ma ('?':ps) [] = False
126 if (havePatt && pIsQuery(currPatt)) {
127 if (haveInput) {
128 ixPatt++; ixInput++; goto tailcall;
129 } else {
130 return False;
131 }
132 }
133
134 // obvious case with literal chars in the pattern
135 //
136 // ma (p:ps) (i:is) = p == i && ma ps is
137 if (havePatt && haveInput) {
philippe13a59522012-08-03 23:11:39 +0000138 if (!pattEQinp(currPatt,currInput,inputCompleter,ixInput)) return False;
sewardjd7a02db2008-12-12 08:07:49 +0000139 ixPatt++; ixInput++; goto tailcall;
140 }
141
142 // if we run out of input before we run out of pattern, we must fail
143 // ma (p:ps) [] = False
144 if (havePatt && !haveInput) return False;
145
146 // if we run out of pattern before we run out of input, the
147 // verdict depends on the matching mode. If we are trying to
148 // match exactly (the pattern must consume the entire input)
149 // then the outcome is failure. However, if we're merely attempting
150 // to match some prefix of the input, then we have been successful.
151 //
152 // ma [] (i:is) = False -- m-all, True for m-prefix
153 if (!havePatt && haveInput) {
154 return matchAll ? False // match-all
155 : True; // match-prefix
156 }
157
158 // finally, if both sequence and input are both completely
159 // consumed, then we were successful, regardless of matching mode.
160 if (!havePatt && !haveInput) return True;
161
162 // end of cases
163 vg_assert(0);
164}
165
166
167/* And a parameterization of the above, to make it do
168 string matching.
169*/
florian3e798632012-11-24 19:41:54 +0000170static Bool charIsStar ( const void* pV ) { return *(const HChar*)pV == '*'; }
171static Bool charIsQuery ( const void* pV ) { return *(const HChar*)pV == '?'; }
172static Bool char_p_EQ_i ( const void* pV, const void* cV,
philippe13a59522012-08-03 23:11:39 +0000173 void* null_completer, UWord ixcV ) {
florian3e798632012-11-24 19:41:54 +0000174 HChar p = *(const HChar*)pV;
175 HChar c = *(const HChar*)cV;
sewardjd7a02db2008-12-12 08:07:49 +0000176 vg_assert(p != '*' && p != '?');
177 return p == c;
178}
florian19f91bb2012-11-10 22:29:54 +0000179Bool VG_(string_match) ( const HChar* patt, const HChar* input )
sewardjd7a02db2008-12-12 08:07:49 +0000180{
181 return VG_(generic_match)(
182 True/* match-all */,
florian3e798632012-11-24 19:41:54 +0000183 patt, sizeof(HChar), VG_(strlen)(patt), 0,
184 input, sizeof(HChar), VG_(strlen)(input), 0,
philippe13a59522012-08-03 23:11:39 +0000185 charIsStar, charIsQuery, char_p_EQ_i,
philippea0a73932014-06-15 15:42:20 +0000186 NULL, NULL
sewardjd7a02db2008-12-12 08:07:49 +0000187 );
188}
189
190
191// test cases for the matcher (in match-all mode)
192// typedef struct { char* patt; char* input; Bool xres; } Test;
193//
194//static Test tests[] =
195// {
196// { "" ,"" , True },
197// { "a" ,"" , False },
198// { "a" ,"b" , False },
199// { "a" ,"a" , True },
200// { "a" ,"aa" , False },
201// { "*" ,"" , True },
202// { "**" ,"" , True },
203// { "*" ,"abc", True },
204// { "*a" ,"abc", False },
205// { "*b" ,"abc", False },
206// { "*bc" ,"abc", True },
207// { "a*b" ,"abc", False },
208// { "a*c" ,"abc", True },
209// { "*c" ,"abc", True },
210// { "c*c" ,"abc", False },
211// { "abc*" ,"abc", True },
212// { "abc**" ,"abc", True },
213// { "**abc" ,"abc", True },
214// { "**a*b*c**" ,"abc", True },
215// { "**a*b*d**" ,"abc", False },
216// { "a?b" ,"abc", False },
217// { "a?c" ,"abc", True },
218// { "?" ,"" , False },
219// { "?" ,"a" , True },
220// { "?" ,"ab" , False },
221// { "abcd" ,"abc", False },
222// { "ab" ,"abc", False },
223// { NULL ,NULL , False }
224// };
225//
226//int main ( void )
227//{
228// Test* t;
229// for (t = tests; t->patt; t++) {
florian866862a2014-12-13 18:35:00 +0000230// printf("%-10s %-6s %s\n",
sewardjd7a02db2008-12-12 08:07:49 +0000231// t->patt, t->input,
232// match_string_all((UChar*)t->patt,(UChar*)t->input,True)
233// == t->xres
234// ? "pass" : "FAIL" );
235// }
236// return 0;
237//}
238
239/*--------------------------------------------------------------------*/
240/*--- end m_seqmatch.c ---*/
241/*--------------------------------------------------------------------*/