blob: 60aa2b2d3e3d03978a427b413ad640cf0d7ddaeb [file] [log] [blame]
Bjorn Bringertfb903a42013-03-18 21:17:26 +00001#include "pseudolocalize.h"
2
3using namespace std;
4
Anton Krumina2ef5c02014-03-12 14:46:44 -07005// String basis to generate expansion
6static const String16 k_expansion_string = String16("one two three "
7 "four five six seven eight nine ten eleven twelve thirteen "
8 "fourteen fiveteen sixteen seventeen nineteen twenty");
9
10// Special unicode characters to override directionality of the words
11static const String16 k_rlm = String16("\xe2\x80\x8f");
12static const String16 k_rlo = String16("\xE2\x80\xae");
13static const String16 k_pdf = String16("\xE2\x80\xac");
14
15// Placeholder marks
16static const String16 k_placeholder_open = String16("\xc2\xbb");
17static const String16 k_placeholder_close = String16("\xc2\xab");
18
Bjorn Bringertfb903a42013-03-18 21:17:26 +000019static const char*
Anton Krumina2ef5c02014-03-12 14:46:44 -070020pseudolocalize_char(const char16_t c)
Bjorn Bringertfb903a42013-03-18 21:17:26 +000021{
22 switch (c) {
Anton Krumina2ef5c02014-03-12 14:46:44 -070023 case 'a': return "\xc3\xa5";
24 case 'b': return "\xc9\x93";
25 case 'c': return "\xc3\xa7";
26 case 'd': return "\xc3\xb0";
27 case 'e': return "\xc3\xa9";
Bjorn Bringertfb903a42013-03-18 21:17:26 +000028 case 'f': return "\xc6\x92";
29 case 'g': return "\xc4\x9d";
Anton Krumina2ef5c02014-03-12 14:46:44 -070030 case 'h': return "\xc4\xa5";
31 case 'i': return "\xc3\xae";
Bjorn Bringertfb903a42013-03-18 21:17:26 +000032 case 'j': return "\xc4\xb5";
Anton Krumina2ef5c02014-03-12 14:46:44 -070033 case 'k': return "\xc4\xb7";
34 case 'l': return "\xc4\xbc";
Bjorn Bringertfb903a42013-03-18 21:17:26 +000035 case 'm': return "\xe1\xb8\xbf";
Anton Krumina2ef5c02014-03-12 14:46:44 -070036 case 'n': return "\xc3\xb1";
37 case 'o': return "\xc3\xb6";
38 case 'p': return "\xc3\xbe";
Bjorn Bringertfb903a42013-03-18 21:17:26 +000039 case 'q': return "\x51";
Anton Krumina2ef5c02014-03-12 14:46:44 -070040 case 'r': return "\xc5\x95";
Bjorn Bringertfb903a42013-03-18 21:17:26 +000041 case 's': return "\xc5\xa1";
Anton Krumina2ef5c02014-03-12 14:46:44 -070042 case 't': return "\xc5\xa3";
43 case 'u': return "\xc3\xbb";
Bjorn Bringertfb903a42013-03-18 21:17:26 +000044 case 'v': return "\x56";
Anton Krumina2ef5c02014-03-12 14:46:44 -070045 case 'w': return "\xc5\xb5";
Bjorn Bringertfb903a42013-03-18 21:17:26 +000046 case 'x': return "\xd1\x85";
Anton Krumina2ef5c02014-03-12 14:46:44 -070047 case 'y': return "\xc3\xbd";
48 case 'z': return "\xc5\xbe";
Bjorn Bringertfb903a42013-03-18 21:17:26 +000049 case 'A': return "\xc3\x85";
50 case 'B': return "\xce\xb2";
Anton Krumina2ef5c02014-03-12 14:46:44 -070051 case 'C': return "\xc3\x87";
52 case 'D': return "\xc3\x90";
53 case 'E': return "\xc3\x89";
54 case 'G': return "\xc4\x9c";
55 case 'H': return "\xc4\xa4";
56 case 'I': return "\xc3\x8e";
57 case 'J': return "\xc4\xb4";
Bjorn Bringertfb903a42013-03-18 21:17:26 +000058 case 'K': return "\xc4\xb6";
Anton Krumina2ef5c02014-03-12 14:46:44 -070059 case 'L': return "\xc4\xbb";
Bjorn Bringertfb903a42013-03-18 21:17:26 +000060 case 'M': return "\xe1\xb8\xbe";
Anton Krumina2ef5c02014-03-12 14:46:44 -070061 case 'N': return "\xc3\x91";
62 case 'O': return "\xc3\x96";
63 case 'P': return "\xc3\x9e";
Bjorn Bringertfb903a42013-03-18 21:17:26 +000064 case 'Q': return "\x71";
Anton Krumina2ef5c02014-03-12 14:46:44 -070065 case 'R': return "\xc5\x94";
66 case 'S': return "\xc5\xa0";
67 case 'T': return "\xc5\xa2";
68 case 'U': return "\xc3\x9b";
Bjorn Bringertfb903a42013-03-18 21:17:26 +000069 case 'V': return "\xce\xbd";
Anton Krumina2ef5c02014-03-12 14:46:44 -070070 case 'W': return "\xc5\xb4";
Bjorn Bringertfb903a42013-03-18 21:17:26 +000071 case 'X': return "\xc3\x97";
Anton Krumina2ef5c02014-03-12 14:46:44 -070072 case 'Y': return "\xc3\x9d";
Bjorn Bringertfb903a42013-03-18 21:17:26 +000073 case 'Z': return "\xc5\xbd";
Anton Krumina2ef5c02014-03-12 14:46:44 -070074 case '!': return "\xc2\xa1";
75 case '?': return "\xc2\xbf";
76 case '$': return "\xe2\x82\xac";
Bjorn Bringertfb903a42013-03-18 21:17:26 +000077 default: return NULL;
78 }
79}
80
Elliott Hughes0a1eed32014-04-18 16:15:27 -070081static bool
Anton Krumina2ef5c02014-03-12 14:46:44 -070082is_possible_normal_placeholder_end(const char16_t c) {
83 switch (c) {
84 case 's': return true;
85 case 'S': return true;
86 case 'c': return true;
87 case 'C': return true;
88 case 'd': return true;
89 case 'o': return true;
90 case 'x': return true;
91 case 'X': return true;
92 case 'f': return true;
93 case 'e': return true;
94 case 'E': return true;
95 case 'g': return true;
96 case 'G': return true;
97 case 'a': return true;
98 case 'A': return true;
99 case 'b': return true;
100 case 'B': return true;
101 case 'h': return true;
102 case 'H': return true;
103 case '%': return true;
104 case 'n': return true;
105 default: return false;
106 }
107}
108
109String16
110pseudo_generate_expansion(const unsigned int length) {
111 String16 result = k_expansion_string;
112 const char16_t* s = result.string();
113 if (result.size() < length) {
114 result += String16(" ");
115 result += pseudo_generate_expansion(length - result.size());
116 } else {
117 int ext = 0;
118 // Should contain only whole words, so looking for a space
119 for (unsigned int i = length + 1; i < result.size(); ++i) {
120 ++ext;
121 if (s[i] == ' ') {
122 break;
123 }
124 }
125 result.remove(length + ext, 0);
126 }
127 return result;
128}
129
Bjorn Bringertfb903a42013-03-18 21:17:26 +0000130/**
131 * Converts characters so they look like they've been localized.
132 *
133 * Note: This leaves escape sequences untouched so they can later be
134 * processed by ResTable::collectString in the normal way.
135 */
Anton Krumina2ef5c02014-03-12 14:46:44 -0700136String16
137pseudolocalize_string(const String16& source)
Bjorn Bringertfb903a42013-03-18 21:17:26 +0000138{
Anton Krumina2ef5c02014-03-12 14:46:44 -0700139 const char16_t* s = source.string();
140 String16 result;
141 const size_t I = source.size();
Bjorn Bringertfb903a42013-03-18 21:17:26 +0000142 for (size_t i=0; i<I; i++) {
Anton Krumina2ef5c02014-03-12 14:46:44 -0700143 char16_t c = s[i];
Bjorn Bringertfb903a42013-03-18 21:17:26 +0000144 if (c == '\\') {
Anton Krumina2ef5c02014-03-12 14:46:44 -0700145 // Escape syntax, no need to pseudolocalize
Bjorn Bringertfb903a42013-03-18 21:17:26 +0000146 if (i<I-1) {
Anton Krumina2ef5c02014-03-12 14:46:44 -0700147 result += String16("\\");
Bjorn Bringertfb903a42013-03-18 21:17:26 +0000148 i++;
149 c = s[i];
150 switch (c) {
151 case 'u':
152 // this one takes up 5 chars
Anton Krumina2ef5c02014-03-12 14:46:44 -0700153 result += String16(s+i, 5);
Bjorn Bringertfb903a42013-03-18 21:17:26 +0000154 i += 4;
155 break;
156 case 't':
157 case 'n':
158 case '#':
159 case '@':
160 case '?':
161 case '"':
162 case '\'':
163 case '\\':
164 default:
Anton Krumina2ef5c02014-03-12 14:46:44 -0700165 result.append(&c, 1);
Bjorn Bringertfb903a42013-03-18 21:17:26 +0000166 break;
167 }
168 } else {
Anton Krumina2ef5c02014-03-12 14:46:44 -0700169 result.append(&c, 1);
170 }
171 } else if (c == '%') {
172 // Placeholder syntax, no need to pseudolocalize
173 result += k_placeholder_open;
174 bool end = false;
175 result.append(&c, 1);
176 while (!end && i < I) {
177 ++i;
178 c = s[i];
179 result.append(&c, 1);
180 if (is_possible_normal_placeholder_end(c)) {
181 end = true;
182 } else if (c == 't') {
183 ++i;
184 c = s[i];
185 result.append(&c, 1);
186 end = true;
187 }
188 }
189 result += k_placeholder_close;
190 } else if (c == '<' || c == '&') {
191 // html syntax, no need to pseudolocalize
192 bool tag_closed = false;
193 while (!tag_closed && i < I) {
194 if (c == '&') {
195 String16 escape_text;
196 escape_text.append(&c, 1);
197 bool end = false;
198 size_t htmlCodePos = i;
199 while (!end && htmlCodePos < I) {
200 ++htmlCodePos;
201 c = s[htmlCodePos];
202 escape_text.append(&c, 1);
203 // Valid html code
204 if (c == ';') {
205 end = true;
206 i = htmlCodePos;
207 }
208 // Wrong html code
209 else if (!((c == '#' ||
210 (c >= 'a' && c <= 'z') ||
211 (c >= 'A' && c <= 'Z') ||
212 (c >= '0' && c <= '9')))) {
213 end = true;
214 }
215 }
216 result += escape_text;
217 if (escape_text != String16("&lt;")) {
218 tag_closed = true;
219 }
220 continue;
221 }
222 if (c == '>') {
223 tag_closed = true;
224 result.append(&c, 1);
225 continue;
226 }
227 result.append(&c, 1);
228 i++;
229 c = s[i];
Bjorn Bringertfb903a42013-03-18 21:17:26 +0000230 }
231 } else {
Anton Krumina2ef5c02014-03-12 14:46:44 -0700232 // This is a pure text that should be pseudolocalized
Bjorn Bringertfb903a42013-03-18 21:17:26 +0000233 const char* p = pseudolocalize_char(c);
234 if (p != NULL) {
Anton Krumina2ef5c02014-03-12 14:46:44 -0700235 result += String16(p);
Bjorn Bringertfb903a42013-03-18 21:17:26 +0000236 } else {
Anton Krumina2ef5c02014-03-12 14:46:44 -0700237 result.append(&c, 1);
Bjorn Bringertfb903a42013-03-18 21:17:26 +0000238 }
239 }
240 }
Bjorn Bringertfb903a42013-03-18 21:17:26 +0000241 return result;
242}
243
Anton Krumina2ef5c02014-03-12 14:46:44 -0700244String16
245pseudobidi_string(const String16& source)
246{
247 const char16_t* s = source.string();
248 String16 result;
249 result += k_rlm;
250 result += k_rlo;
251 for (size_t i=0; i<source.size(); i++) {
252 char16_t c = s[i];
253 switch(c) {
254 case ' ': result += k_pdf;
255 result += k_rlm;
256 result.append(&c, 1);
257 result += k_rlm;
258 result += k_rlo;
259 break;
260 default: result.append(&c, 1);
261 break;
262 }
263 }
264 result += k_pdf;
265 result += k_rlm;
266 return result;
267}
Bjorn Bringertfb903a42013-03-18 21:17:26 +0000268