blob: 64a58ed7e646234ab5135bef3363c7f097b9357f [file] [log] [blame]
philip.liard@gmail.comb9056912011-08-18 11:41:24 +00001// Copyright (C) 2011 The Libphonenumber Authors
philip.liard@gmail.com1ad5e5b2011-07-01 08:22:06 +00002//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15// Author: George Yakovlev
16// Philippe Liard
17
philip.liard@gmail.com384682a2011-07-12 15:41:29 +000018#include "phonenumbers/regexp_adapter_re2.h"
philip.liard@gmail.com1ad5e5b2011-07-01 08:22:06 +000019
20#include <cstddef>
21#include <string>
22
23#include <re2/re2.h>
24#include <re2/stringpiece.h>
25
philip.liard@gmail.comaf4a2ce2013-04-30 11:35:55 +000026#include "phonenumbers/base/basictypes.h"
27#include "phonenumbers/base/logging.h"
philip.liard@gmail.com1ad5e5b2011-07-01 08:22:06 +000028#include "phonenumbers/stringutil.h"
29
30namespace i18n {
31namespace phonenumbers {
32
33using re2::StringPiece;
34
35// Implementation of RegExpInput abstract class.
36class RE2RegExpInput : public RegExpInput {
37 public:
38 explicit RE2RegExpInput(const string& utf8_input)
39 : string_(utf8_input),
40 utf8_input_(string_) {}
41
42 virtual string ToString() const {
43 return utf8_input_.ToString();
44 }
45
46 StringPiece* Data() {
47 return &utf8_input_;
48 }
49
50 private:
51 // string_ holds the string referenced by utf8_input_ as StringPiece doesn't
52 // copy the string passed in.
53 const string string_;
54 StringPiece utf8_input_;
55};
56
57namespace {
58
59template <typename Function, typename Input>
60bool DispatchRE2Call(Function regex_function,
61 Input input,
62 const RE2& regexp,
63 string* out1,
64 string* out2,
65 string* out3) {
66 if (out3) {
67 return regex_function(input, regexp, out1, out2, out3);
68 }
69 if (out2) {
70 return regex_function(input, regexp, out1, out2);
71 }
72 if (out1) {
73 return regex_function(input, regexp, out1);
74 }
75 return regex_function(input, regexp);
76}
77
78// Replaces unescaped dollar-signs with backslashes. Backslashes are deleted
79// when they escape dollar-signs.
80string TransformRegularExpressionToRE2Syntax(const string& regex) {
81 string re2_regex(regex);
82 if (GlobalReplaceSubstring("$", "\\", &re2_regex) == 0) {
83 return regex;
84 }
85 // If we replaced a dollar sign with a backslash and there are now two
86 // backslashes in the string, we assume that the dollar-sign was previously
87 // escaped and that we need to retain it. To do this, we replace pairs of
88 // backslashes with a dollar sign.
89 GlobalReplaceSubstring("\\\\", "$", &re2_regex);
90 return re2_regex;
91}
92
93} // namespace
94
95// Implementation of RegExp abstract class.
96class RE2RegExp : public RegExp {
97 public:
98 explicit RE2RegExp(const string& utf8_regexp)
99 : utf8_regexp_(utf8_regexp) {}
100
101 virtual bool Consume(RegExpInput* input_string,
102 bool anchor_at_start,
103 string* matched_string1,
104 string* matched_string2,
105 string* matched_string3) const {
106 DCHECK(input_string);
107 StringPiece* utf8_input =
108 static_cast<RE2RegExpInput*>(input_string)->Data();
109
110 if (anchor_at_start) {
111 return DispatchRE2Call(RE2::Consume, utf8_input, utf8_regexp_,
112 matched_string1, matched_string2,
113 matched_string3);
114 } else {
115 return DispatchRE2Call(RE2::FindAndConsume, utf8_input, utf8_regexp_,
116 matched_string1, matched_string2,
117 matched_string3);
118 }
119 }
120
121 virtual bool Match(const string& input_string,
122 bool full_match,
123 string* matched_string) const {
124 if (full_match) {
125 return DispatchRE2Call(RE2::FullMatch, input_string, utf8_regexp_,
126 matched_string, NULL, NULL);
127 } else {
128 return DispatchRE2Call(RE2::PartialMatch, input_string, utf8_regexp_,
129 matched_string, NULL, NULL);
130 }
131 }
132
133 virtual bool Replace(string* string_to_process,
134 bool global,
135 const string& replacement_string) const {
136 DCHECK(string_to_process);
137 const string re2_replacement_string =
138 TransformRegularExpressionToRE2Syntax(replacement_string);
139 if (global) {
140 return RE2::GlobalReplace(string_to_process, utf8_regexp_,
141 re2_replacement_string);
142 } else {
143 return RE2::Replace(string_to_process, utf8_regexp_,
144 re2_replacement_string);
145 }
146 }
147
148 private:
149 RE2 utf8_regexp_;
150};
151
philip.liard@gmail.com384682a2011-07-12 15:41:29 +0000152RegExpInput* RE2RegExpFactory::CreateInput(const string& utf8_input) const {
philip.liard@gmail.com1ad5e5b2011-07-01 08:22:06 +0000153 return new RE2RegExpInput(utf8_input);
154}
155
philip.liard@gmail.com384682a2011-07-12 15:41:29 +0000156RegExp* RE2RegExpFactory::CreateRegExp(const string& utf8_regexp) const {
philip.liard@gmail.com1ad5e5b2011-07-01 08:22:06 +0000157 return new RE2RegExp(utf8_regexp);
158}
159
160} // namespace phonenumbers
161} // namespace i18n