blob: 379eea56a24a4ee517712d4cde8ec68cf9af7bd5 [file] [log] [blame]
Upstreamcc2ee171970-01-12 13:46:40 +00001/**
2 * @file op_regex.cpp
3 * This file contains implementation for a lightweight wrapper around
4 * libc regex, providing regular expression match and replace facility.
5 *
6 * @remark Copyright 2003 OProfile authors
7 * @remark Read the file COPYING
8 * @remark Idea comes from TextFilt project <http://textfilt.sourceforge.net>
9 *
10 * @author Philippe Elie
11 */
12
13#include <cerrno>
14
15#include <iostream>
16#include <fstream>
17
18#include "string_manip.h"
19
20#include "op_regex.h"
21
22using namespace std;
23
24namespace {
25
26string op_regerror(int err, regex_t const & regexp)
27{
28 size_t needed_size = regerror(err, &regexp, 0, 0);
29 char * buffer = new char[needed_size];
30 regerror(err, &regexp, buffer, needed_size);
31
32 return buffer;
33}
34
35
36void op_regcomp(regex_t & regexp, string const & pattern)
37{
38 int err = regcomp(&regexp, pattern.c_str(), REG_EXTENDED);
39 if (err) {
40 throw bad_regex("regcomp error: " + op_regerror(err, regexp)
41 + " for pattern : " + pattern);
42 }
43}
44
45
46bool op_regexec(regex_t const & regex, string const & str, regmatch_t * match,
47 size_t nmatch)
48{
49 return regexec(&regex, str.c_str(), nmatch, match, 0) != REG_NOMATCH;
50}
51
52
53void op_regfree(regex_t & regexp)
54{
55 regfree(&regexp);
56}
57
58
59// return the index number associated with a char seen in a "\x".
60// Allowed range are for x is [0-9a-z] return size_t(-1) if x is not in
61// these ranges.
62size_t subexpr_index(char ch)
63{
64 if (isdigit(ch))
65 return ch - '0';
66 if (ch >= 'a' && ch <= 'z')
67 return ch - 'a' + 10;
68 return size_t(-1);
69}
70
71} // anonymous namespace
72
73
74bad_regex::bad_regex(string const & pattern)
75 : op_exception(pattern)
76{
77}
78
79
80regular_expression_replace::regular_expression_replace(size_t limit_,
81 size_t limit_defs)
82 :
83 limit(limit_),
84 limit_defs_expansion(limit_defs)
85{
86}
87
88
89regular_expression_replace::~regular_expression_replace()
90{
91 for (size_t i = 0 ; i < regex_replace.size() ; ++i)
92 op_regfree(regex_replace[i].regexp);
93}
94
95
96void regular_expression_replace::add_definition(string const & name,
97 string const & definition)
98{
99 defs[name] = expand_string(definition);
100}
101
102
103void regular_expression_replace::add_pattern(string const & pattern,
104 string const & replace)
105{
106 string expanded_pattern = expand_string(pattern);
107
108 regex_t regexp;
109 op_regcomp(regexp, expanded_pattern);
110 replace_t regex = { regexp, replace };
111 regex_replace.push_back(regex);
112}
113
114
115string regular_expression_replace::expand_string(string const & input)
116{
117 string last, expanded(input);
118 size_t i = 0;
119 for (i = 0 ; i < limit_defs_expansion ; ++i) {
120 last = expanded;
121 expanded = substitute_definition(last);
122 if (expanded == last)
123 break;
124 }
125
126 if (i == limit_defs_expansion)
127 throw bad_regex("too many substitution for: + input");
128
129 return last;
130}
131
132
133string regular_expression_replace::substitute_definition(string const & pattern)
134{
135 string result;
136 bool previous_is_escape = false;
137
138 for (size_t i = 0 ; i < pattern.length() ; ++i) {
139 if (pattern[i] == '$' && !previous_is_escape) {
140 size_t pos = pattern.find('{', i);
141 if (pos != i + 1) {
142 throw bad_regex("invalid $ in pattern: " + pattern);
143 }
144 size_t end = pattern.find('}', i);
145 if (end == string::npos) {
146 throw bad_regex("no matching '}' in pattern: " + pattern);
147 }
148 string def_name = pattern.substr(pos+1, (end-pos) - 1);
149 if (defs.find(def_name) == defs.end()) {
150 throw bad_regex("definition not found and used in pattern: (" + def_name + ") " + pattern);
151 }
152 result += defs[def_name];
153 i = end;
154 } else {
155 if (pattern[i] == '\\' && !previous_is_escape) {
156 previous_is_escape = true;
157 } else {
158 previous_is_escape = false;
159 }
160 result += pattern[i];
161 }
162 }
163
164 return result;
165}
166
167
168// FIXME limit output string size ? (cause we can have exponential growing
169// of output string through a rule "a" = "aa")
170bool regular_expression_replace::execute(string & str) const
171{
172 bool changed = true;
173 for (size_t nr_iter = 0; changed && nr_iter < limit ; ++nr_iter) {
174 changed = false;
175 for (size_t i = 0 ; i < regex_replace.size() ; ++i) {
176 if (do_execute(str, regex_replace[i])) {
177 changed = true;
178 }
179 }
180 }
181
182 // this don't return if the input string has been changed but if
183 // we reach the limit number of iteration.
184 return changed == false;
185}
186
187
188bool regular_expression_replace::do_execute(string & str,
189 replace_t const & regexp) const
190{
191 bool changed = false;
192
193 regmatch_t match[max_match];
194 for (size_t iter = 0;
195 op_regexec(regexp.regexp, str, match, max_match) && iter < limit;
196 iter++) {
197 changed = true;
198 do_replace(str, regexp.replace, match);
199 }
200
201 return changed;
202}
203
204
205regmatch_t const &
206regular_expression_replace::get_match(regmatch_t const * match, char idx) const
207{
208 size_t sub_expr = subexpr_index(idx);
209 if (sub_expr == size_t(-1))
210 throw bad_regex("expect group index: " + idx);
211 if (sub_expr >= max_match)
212 throw bad_regex("illegal group index :" + idx);
213 return match[sub_expr];
214}
215
216void regular_expression_replace::do_replace
217(string & str, string const & replace, regmatch_t const * match) const
218{
219 string inserted;
220 for (size_t i = 0 ; i < replace.length() ; ++i) {
221 if (replace[i] == '\\') {
222 if (i == replace.length() - 1) {
223 throw bad_regex("illegal \\ trailer: " +
224 replace);
225 }
226 ++i;
227 if (replace[i] == '\\') {
228 inserted += '\\';
229 } else {
230 regmatch_t const & matched = get_match(match,
231 replace[i]);
232 if (matched.rm_so == -1 &&
233 matched.rm_eo == -1) {
234 // empty match: nothing todo
235 } else if (matched.rm_so == -1 ||
236 matched.rm_eo == -1) {
237 throw bad_regex("illegal match: " +
238 replace);
239 } else {
240 inserted += str.substr(matched.rm_so,
241 matched.rm_eo - matched.rm_so);
242 }
243 }
244 } else {
245 inserted += replace[i];
246 }
247 }
248
249 size_t first = match[0].rm_so;
250 size_t count = match[0].rm_eo - match[0].rm_so;
251
252 str.replace(first, count, inserted);
253}
254
255
256void setup_regex(regular_expression_replace & regex,
257 string const & filename)
258{
259 ifstream in(filename.c_str());
260 if (!in) {
261 throw op_runtime_error("Can't open file " + filename +
262 " for reading", errno);
263 }
264
265 regular_expression_replace var_name_rule;
266 var_name_rule.add_pattern("^\\$([_a-zA-Z][_a-zA-Z0-9]*)[ ]*=.*", "\\1");
267 regular_expression_replace var_value_rule;
268 var_value_rule.add_pattern(".*=[ ]*\"(.*)\"", "\\1");
269
270 regular_expression_replace left_rule;
271 left_rule.add_pattern("[ ]*\"(.*)\"[ ]*=.*", "\\1");
272 regular_expression_replace right_rule;
273 right_rule.add_pattern(".*=[ ]*\"(.*)\"", "\\1");
274
275 string line;
276 while (getline(in, line)) {
277 line = trim(line);
278 if (line.empty() || line[0] == '#')
279 continue;
280
281 string temp = line;
282 var_name_rule.execute(temp);
283 if (temp == line) {
284 string left = line;
285 left_rule.execute(left);
286 if (left == line) {
287 throw bad_regex("invalid input file: " +
288 '"' + line + '"');
289 }
290
291 string right = line;
292 right_rule.execute(right);
293 if (right == line) {
294 throw bad_regex("invalid input file: "
295 + '"' + line + '"');
296 }
297
298 regex.add_pattern(left, right);
299 } else {
300 // temp != line ==> var_name_rule succeed to substitute
301 // into temp the var_name present in line
302 string var_name = temp;
303 string var_value = line;
304 var_value_rule.execute(var_value);
305 if (var_value == line) {
306 throw bad_regex("invalid input file: " +
307 '"' + line + '"');
308 }
309
310 regex.add_definition(var_name, var_value);
311 }
312 }
313}