blob: 423b9805918569964173a9f4309878ccdf747eda [file] [log] [blame]
Mike Dodd8cfa7022010-11-17 11:12:26 -08001/**
2 * @file op_regex.h
3 * This file contains various definitions and interface for a
4 * lightweight wrapper around libc regex, providing match
5 * and replace facility.
6 *
7 * @remark Copyright 2003 OProfile authors
8 * @remark Read the file COPYING
9 * @remark Idea comes from TextFilt project <http://textfilt.sourceforge.net>
10 *
11 * @author Philippe Elie
12 */
13
14#ifndef OP_REGEX_H
15#define OP_REGEX_H
16
17// required by posix before including regex.h
18#include <sys/types.h>
19#include <regex.h>
20
21#include <string>
22#include <vector>
23#include <map>
24
25#include "op_exception.h"
26
27/**
28 * ill formed regular expression or expression throw such exception
29 */
30struct bad_regex : op_exception {
31 bad_regex(std::string const & pattern);
32};
33
34/**
35 * lightweight encapsulation of regex lib search and replace
36 *
37 * See stl.pat for further details and examples of used syntax.
38 */
39class regular_expression_replace {
40public:
41 /**
42 * @param limit limit on number of search and replace done
43 * @param limit_defs_expansion limit on number of expansion done
44 * during replacement of regular definition name by their expansion
45 *
46 * build an object holding regular defintion and regular expression
47 * & replace, preparing it for substitution ala sed
48 */
49 regular_expression_replace(size_t limit = 100,
50 size_t limit_defs_expansion = 100);
51 ~regular_expression_replace();
52
53 /**
54 * @param name a regular definition name
55 * @param replace the string to subsitute in other regular definition
56 * or regular exepression when this regular defintion name is
57 * encoutered.
58 */
59 void add_definition(std::string const & name,
60 std::string const & replace);
61 /**
62 * @param pattern a regular expression pattern, POSIX extended notation
63 * @param replace the replace string to use when this regular
64 * expression is matched
65 *
66 * You can imbed regular definition in pattern but not in replace.
67 */
68 void add_pattern(std::string const & pattern,
69 std::string const & replace);
70
71 /**
72 * @param str the input/output string where we search pattern and
73 * replace them.
74 *
75 * Execute loop at max limit time on the set of regular expression
76 *
77 * Return true if too many match occur and replacing has been stopped
78 * due to reach limit_defs_expansion. You can test if some pattern has
79 * been matched by saving the input string and comparing it to the new
80 * value. There is no way to detect s/a/a because the output string
81 * will be identical to the input string.
82 */
83 bool execute(std::string & str) const;
84private:
85 struct replace_t {
86 // when this regexp is matched
87 regex_t regexp;
88 // replace the matched part with this string
89 std::string replace;
90 };
91
92 // helper to execute
93 bool do_execute(std::string & str, replace_t const & regexp) const;
94 void do_replace(std::string & str, std::string const & replace,
95 regmatch_t const * match) const;
96
97 // helper to add_definition() and add_pattern()
98 std::string expand_string(std::string const & input);
99
100 // helper to add_pattern
101 std::string substitute_definition(std::string const & pattern);
102
103 // return the match of throw if idx is invalid
104 regmatch_t const & get_match(regmatch_t const * match, char idx) const;
105
106 // don't increase too, it have direct impact on performance. This limit
107 // the number of grouping expression allowed in a regular expression
108 // Note than you can use grouping match operator > 9 only in the
109 // replace rule not in match regular expression since POSIX don't allow
110 // more than \9 in matching sequence.
111 static const size_t max_match = 16;
112
113 size_t limit;
114 size_t limit_defs_expansion;
115 std::vector<replace_t> regex_replace;
116 /// dictionary of regular definition
117 typedef std::map<std::string, std::string> defs_dict;
118 defs_dict defs;
119};
120
121/**
122 * @param regex the regular_expression_replace to fill
123 * @param filename the filename from where the deifnition and pattern are read
124 *
125 * add to regex pattern and regular definition read from the given file
126 */
127void setup_regex(regular_expression_replace& regex,
128 std::string const & filename);
129
130#endif /* !OP_REGEX_H */