Mike Dodd | 8cfa702 | 2010-11-17 11:12:26 -0800 | [diff] [blame] | 1 | /** |
| 2 | * @file op_regex.h |
| 3 | * This file contains various definitions and interface for a |
| 4 | * lightweight wrapper around libc regex, providing match |
| 5 | * and replace facility. |
| 6 | * |
| 7 | * @remark Copyright 2003 OProfile authors |
| 8 | * @remark Read the file COPYING |
| 9 | * @remark Idea comes from TextFilt project <http://textfilt.sourceforge.net> |
| 10 | * |
| 11 | * @author Philippe Elie |
| 12 | */ |
| 13 | |
| 14 | #ifndef OP_REGEX_H |
| 15 | #define OP_REGEX_H |
| 16 | |
| 17 | // required by posix before including regex.h |
| 18 | #include <sys/types.h> |
| 19 | #include <regex.h> |
| 20 | |
| 21 | #include <string> |
| 22 | #include <vector> |
| 23 | #include <map> |
| 24 | |
| 25 | #include "op_exception.h" |
| 26 | |
| 27 | /** |
| 28 | * ill formed regular expression or expression throw such exception |
| 29 | */ |
| 30 | struct bad_regex : op_exception { |
| 31 | bad_regex(std::string const & pattern); |
| 32 | }; |
| 33 | |
| 34 | /** |
| 35 | * lightweight encapsulation of regex lib search and replace |
| 36 | * |
| 37 | * See stl.pat for further details and examples of used syntax. |
| 38 | */ |
| 39 | class regular_expression_replace { |
| 40 | public: |
| 41 | /** |
| 42 | * @param limit limit on number of search and replace done |
| 43 | * @param limit_defs_expansion limit on number of expansion done |
| 44 | * during replacement of regular definition name by their expansion |
| 45 | * |
| 46 | * build an object holding regular defintion and regular expression |
| 47 | * & replace, preparing it for substitution ala sed |
| 48 | */ |
| 49 | regular_expression_replace(size_t limit = 100, |
| 50 | size_t limit_defs_expansion = 100); |
| 51 | ~regular_expression_replace(); |
| 52 | |
| 53 | /** |
| 54 | * @param name a regular definition name |
| 55 | * @param replace the string to subsitute in other regular definition |
| 56 | * or regular exepression when this regular defintion name is |
| 57 | * encoutered. |
| 58 | */ |
| 59 | void add_definition(std::string const & name, |
| 60 | std::string const & replace); |
| 61 | /** |
| 62 | * @param pattern a regular expression pattern, POSIX extended notation |
| 63 | * @param replace the replace string to use when this regular |
| 64 | * expression is matched |
| 65 | * |
| 66 | * You can imbed regular definition in pattern but not in replace. |
| 67 | */ |
| 68 | void add_pattern(std::string const & pattern, |
| 69 | std::string const & replace); |
| 70 | |
| 71 | /** |
| 72 | * @param str the input/output string where we search pattern and |
| 73 | * replace them. |
| 74 | * |
| 75 | * Execute loop at max limit time on the set of regular expression |
| 76 | * |
| 77 | * Return true if too many match occur and replacing has been stopped |
| 78 | * due to reach limit_defs_expansion. You can test if some pattern has |
| 79 | * been matched by saving the input string and comparing it to the new |
| 80 | * value. There is no way to detect s/a/a because the output string |
| 81 | * will be identical to the input string. |
| 82 | */ |
| 83 | bool execute(std::string & str) const; |
| 84 | private: |
| 85 | struct replace_t { |
| 86 | // when this regexp is matched |
| 87 | regex_t regexp; |
| 88 | // replace the matched part with this string |
| 89 | std::string replace; |
| 90 | }; |
| 91 | |
| 92 | // helper to execute |
| 93 | bool do_execute(std::string & str, replace_t const & regexp) const; |
| 94 | void do_replace(std::string & str, std::string const & replace, |
| 95 | regmatch_t const * match) const; |
| 96 | |
| 97 | // helper to add_definition() and add_pattern() |
| 98 | std::string expand_string(std::string const & input); |
| 99 | |
| 100 | // helper to add_pattern |
| 101 | std::string substitute_definition(std::string const & pattern); |
| 102 | |
| 103 | // return the match of throw if idx is invalid |
| 104 | regmatch_t const & get_match(regmatch_t const * match, char idx) const; |
| 105 | |
| 106 | // don't increase too, it have direct impact on performance. This limit |
| 107 | // the number of grouping expression allowed in a regular expression |
| 108 | // Note than you can use grouping match operator > 9 only in the |
| 109 | // replace rule not in match regular expression since POSIX don't allow |
| 110 | // more than \9 in matching sequence. |
| 111 | static const size_t max_match = 16; |
| 112 | |
| 113 | size_t limit; |
| 114 | size_t limit_defs_expansion; |
| 115 | std::vector<replace_t> regex_replace; |
| 116 | /// dictionary of regular definition |
| 117 | typedef std::map<std::string, std::string> defs_dict; |
| 118 | defs_dict defs; |
| 119 | }; |
| 120 | |
| 121 | /** |
| 122 | * @param regex the regular_expression_replace to fill |
| 123 | * @param filename the filename from where the deifnition and pattern are read |
| 124 | * |
| 125 | * add to regex pattern and regular definition read from the given file |
| 126 | */ |
| 127 | void setup_regex(regular_expression_replace& regex, |
| 128 | std::string const & filename); |
| 129 | |
| 130 | #endif /* !OP_REGEX_H */ |