Bjorn Bringert | fb903a4 | 2013-03-18 21:17:26 +0000 | [diff] [blame] | 1 | #include "pseudolocalize.h" |
| 2 | |
| 3 | using namespace std; |
| 4 | |
Anton Krumin | a2ef5c0 | 2014-03-12 14:46:44 -0700 | [diff] [blame] | 5 | // String basis to generate expansion |
| 6 | static const String16 k_expansion_string = String16("one two three " |
| 7 | "four five six seven eight nine ten eleven twelve thirteen " |
| 8 | "fourteen fiveteen sixteen seventeen nineteen twenty"); |
| 9 | |
| 10 | // Special unicode characters to override directionality of the words |
| 11 | static const String16 k_rlm = String16("\xe2\x80\x8f"); |
| 12 | static const String16 k_rlo = String16("\xE2\x80\xae"); |
| 13 | static const String16 k_pdf = String16("\xE2\x80\xac"); |
| 14 | |
| 15 | // Placeholder marks |
| 16 | static const String16 k_placeholder_open = String16("\xc2\xbb"); |
| 17 | static const String16 k_placeholder_close = String16("\xc2\xab"); |
| 18 | |
Bjorn Bringert | fb903a4 | 2013-03-18 21:17:26 +0000 | [diff] [blame] | 19 | static const char* |
Anton Krumin | a2ef5c0 | 2014-03-12 14:46:44 -0700 | [diff] [blame] | 20 | pseudolocalize_char(const char16_t c) |
Bjorn Bringert | fb903a4 | 2013-03-18 21:17:26 +0000 | [diff] [blame] | 21 | { |
| 22 | switch (c) { |
Anton Krumin | a2ef5c0 | 2014-03-12 14:46:44 -0700 | [diff] [blame] | 23 | case 'a': return "\xc3\xa5"; |
| 24 | case 'b': return "\xc9\x93"; |
| 25 | case 'c': return "\xc3\xa7"; |
| 26 | case 'd': return "\xc3\xb0"; |
| 27 | case 'e': return "\xc3\xa9"; |
Bjorn Bringert | fb903a4 | 2013-03-18 21:17:26 +0000 | [diff] [blame] | 28 | case 'f': return "\xc6\x92"; |
| 29 | case 'g': return "\xc4\x9d"; |
Anton Krumin | a2ef5c0 | 2014-03-12 14:46:44 -0700 | [diff] [blame] | 30 | case 'h': return "\xc4\xa5"; |
| 31 | case 'i': return "\xc3\xae"; |
Bjorn Bringert | fb903a4 | 2013-03-18 21:17:26 +0000 | [diff] [blame] | 32 | case 'j': return "\xc4\xb5"; |
Anton Krumin | a2ef5c0 | 2014-03-12 14:46:44 -0700 | [diff] [blame] | 33 | case 'k': return "\xc4\xb7"; |
| 34 | case 'l': return "\xc4\xbc"; |
Bjorn Bringert | fb903a4 | 2013-03-18 21:17:26 +0000 | [diff] [blame] | 35 | case 'm': return "\xe1\xb8\xbf"; |
Anton Krumin | a2ef5c0 | 2014-03-12 14:46:44 -0700 | [diff] [blame] | 36 | case 'n': return "\xc3\xb1"; |
| 37 | case 'o': return "\xc3\xb6"; |
| 38 | case 'p': return "\xc3\xbe"; |
Bjorn Bringert | fb903a4 | 2013-03-18 21:17:26 +0000 | [diff] [blame] | 39 | case 'q': return "\x51"; |
Anton Krumin | a2ef5c0 | 2014-03-12 14:46:44 -0700 | [diff] [blame] | 40 | case 'r': return "\xc5\x95"; |
Bjorn Bringert | fb903a4 | 2013-03-18 21:17:26 +0000 | [diff] [blame] | 41 | case 's': return "\xc5\xa1"; |
Anton Krumin | a2ef5c0 | 2014-03-12 14:46:44 -0700 | [diff] [blame] | 42 | case 't': return "\xc5\xa3"; |
| 43 | case 'u': return "\xc3\xbb"; |
Bjorn Bringert | fb903a4 | 2013-03-18 21:17:26 +0000 | [diff] [blame] | 44 | case 'v': return "\x56"; |
Anton Krumin | a2ef5c0 | 2014-03-12 14:46:44 -0700 | [diff] [blame] | 45 | case 'w': return "\xc5\xb5"; |
Bjorn Bringert | fb903a4 | 2013-03-18 21:17:26 +0000 | [diff] [blame] | 46 | case 'x': return "\xd1\x85"; |
Anton Krumin | a2ef5c0 | 2014-03-12 14:46:44 -0700 | [diff] [blame] | 47 | case 'y': return "\xc3\xbd"; |
| 48 | case 'z': return "\xc5\xbe"; |
Bjorn Bringert | fb903a4 | 2013-03-18 21:17:26 +0000 | [diff] [blame] | 49 | case 'A': return "\xc3\x85"; |
| 50 | case 'B': return "\xce\xb2"; |
Anton Krumin | a2ef5c0 | 2014-03-12 14:46:44 -0700 | [diff] [blame] | 51 | case 'C': return "\xc3\x87"; |
| 52 | case 'D': return "\xc3\x90"; |
| 53 | case 'E': return "\xc3\x89"; |
| 54 | case 'G': return "\xc4\x9c"; |
| 55 | case 'H': return "\xc4\xa4"; |
| 56 | case 'I': return "\xc3\x8e"; |
| 57 | case 'J': return "\xc4\xb4"; |
Bjorn Bringert | fb903a4 | 2013-03-18 21:17:26 +0000 | [diff] [blame] | 58 | case 'K': return "\xc4\xb6"; |
Anton Krumin | a2ef5c0 | 2014-03-12 14:46:44 -0700 | [diff] [blame] | 59 | case 'L': return "\xc4\xbb"; |
Bjorn Bringert | fb903a4 | 2013-03-18 21:17:26 +0000 | [diff] [blame] | 60 | case 'M': return "\xe1\xb8\xbe"; |
Anton Krumin | a2ef5c0 | 2014-03-12 14:46:44 -0700 | [diff] [blame] | 61 | case 'N': return "\xc3\x91"; |
| 62 | case 'O': return "\xc3\x96"; |
| 63 | case 'P': return "\xc3\x9e"; |
Bjorn Bringert | fb903a4 | 2013-03-18 21:17:26 +0000 | [diff] [blame] | 64 | case 'Q': return "\x71"; |
Anton Krumin | a2ef5c0 | 2014-03-12 14:46:44 -0700 | [diff] [blame] | 65 | case 'R': return "\xc5\x94"; |
| 66 | case 'S': return "\xc5\xa0"; |
| 67 | case 'T': return "\xc5\xa2"; |
| 68 | case 'U': return "\xc3\x9b"; |
Bjorn Bringert | fb903a4 | 2013-03-18 21:17:26 +0000 | [diff] [blame] | 69 | case 'V': return "\xce\xbd"; |
Anton Krumin | a2ef5c0 | 2014-03-12 14:46:44 -0700 | [diff] [blame] | 70 | case 'W': return "\xc5\xb4"; |
Bjorn Bringert | fb903a4 | 2013-03-18 21:17:26 +0000 | [diff] [blame] | 71 | case 'X': return "\xc3\x97"; |
Anton Krumin | a2ef5c0 | 2014-03-12 14:46:44 -0700 | [diff] [blame] | 72 | case 'Y': return "\xc3\x9d"; |
Bjorn Bringert | fb903a4 | 2013-03-18 21:17:26 +0000 | [diff] [blame] | 73 | case 'Z': return "\xc5\xbd"; |
Anton Krumin | a2ef5c0 | 2014-03-12 14:46:44 -0700 | [diff] [blame] | 74 | case '!': return "\xc2\xa1"; |
| 75 | case '?': return "\xc2\xbf"; |
| 76 | case '$': return "\xe2\x82\xac"; |
Bjorn Bringert | fb903a4 | 2013-03-18 21:17:26 +0000 | [diff] [blame] | 77 | default: return NULL; |
| 78 | } |
| 79 | } |
| 80 | |
Elliott Hughes | 0a1eed3 | 2014-04-18 16:15:27 -0700 | [diff] [blame] | 81 | static bool |
Anton Krumin | a2ef5c0 | 2014-03-12 14:46:44 -0700 | [diff] [blame] | 82 | is_possible_normal_placeholder_end(const char16_t c) { |
| 83 | switch (c) { |
| 84 | case 's': return true; |
| 85 | case 'S': return true; |
| 86 | case 'c': return true; |
| 87 | case 'C': return true; |
| 88 | case 'd': return true; |
| 89 | case 'o': return true; |
| 90 | case 'x': return true; |
| 91 | case 'X': return true; |
| 92 | case 'f': return true; |
| 93 | case 'e': return true; |
| 94 | case 'E': return true; |
| 95 | case 'g': return true; |
| 96 | case 'G': return true; |
| 97 | case 'a': return true; |
| 98 | case 'A': return true; |
| 99 | case 'b': return true; |
| 100 | case 'B': return true; |
| 101 | case 'h': return true; |
| 102 | case 'H': return true; |
| 103 | case '%': return true; |
| 104 | case 'n': return true; |
| 105 | default: return false; |
| 106 | } |
| 107 | } |
| 108 | |
| 109 | String16 |
| 110 | pseudo_generate_expansion(const unsigned int length) { |
| 111 | String16 result = k_expansion_string; |
| 112 | const char16_t* s = result.string(); |
| 113 | if (result.size() < length) { |
| 114 | result += String16(" "); |
| 115 | result += pseudo_generate_expansion(length - result.size()); |
| 116 | } else { |
| 117 | int ext = 0; |
| 118 | // Should contain only whole words, so looking for a space |
| 119 | for (unsigned int i = length + 1; i < result.size(); ++i) { |
| 120 | ++ext; |
| 121 | if (s[i] == ' ') { |
| 122 | break; |
| 123 | } |
| 124 | } |
| 125 | result.remove(length + ext, 0); |
| 126 | } |
| 127 | return result; |
| 128 | } |
| 129 | |
Bjorn Bringert | fb903a4 | 2013-03-18 21:17:26 +0000 | [diff] [blame] | 130 | /** |
| 131 | * Converts characters so they look like they've been localized. |
| 132 | * |
| 133 | * Note: This leaves escape sequences untouched so they can later be |
| 134 | * processed by ResTable::collectString in the normal way. |
| 135 | */ |
Anton Krumin | a2ef5c0 | 2014-03-12 14:46:44 -0700 | [diff] [blame] | 136 | String16 |
| 137 | pseudolocalize_string(const String16& source) |
Bjorn Bringert | fb903a4 | 2013-03-18 21:17:26 +0000 | [diff] [blame] | 138 | { |
Anton Krumin | a2ef5c0 | 2014-03-12 14:46:44 -0700 | [diff] [blame] | 139 | const char16_t* s = source.string(); |
| 140 | String16 result; |
| 141 | const size_t I = source.size(); |
Bjorn Bringert | fb903a4 | 2013-03-18 21:17:26 +0000 | [diff] [blame] | 142 | for (size_t i=0; i<I; i++) { |
Anton Krumin | a2ef5c0 | 2014-03-12 14:46:44 -0700 | [diff] [blame] | 143 | char16_t c = s[i]; |
Bjorn Bringert | fb903a4 | 2013-03-18 21:17:26 +0000 | [diff] [blame] | 144 | if (c == '\\') { |
Anton Krumin | a2ef5c0 | 2014-03-12 14:46:44 -0700 | [diff] [blame] | 145 | // Escape syntax, no need to pseudolocalize |
Bjorn Bringert | fb903a4 | 2013-03-18 21:17:26 +0000 | [diff] [blame] | 146 | if (i<I-1) { |
Anton Krumin | a2ef5c0 | 2014-03-12 14:46:44 -0700 | [diff] [blame] | 147 | result += String16("\\"); |
Bjorn Bringert | fb903a4 | 2013-03-18 21:17:26 +0000 | [diff] [blame] | 148 | i++; |
| 149 | c = s[i]; |
| 150 | switch (c) { |
| 151 | case 'u': |
| 152 | // this one takes up 5 chars |
Anton Krumin | a2ef5c0 | 2014-03-12 14:46:44 -0700 | [diff] [blame] | 153 | result += String16(s+i, 5); |
Bjorn Bringert | fb903a4 | 2013-03-18 21:17:26 +0000 | [diff] [blame] | 154 | i += 4; |
| 155 | break; |
| 156 | case 't': |
| 157 | case 'n': |
| 158 | case '#': |
| 159 | case '@': |
| 160 | case '?': |
| 161 | case '"': |
| 162 | case '\'': |
| 163 | case '\\': |
| 164 | default: |
Anton Krumin | a2ef5c0 | 2014-03-12 14:46:44 -0700 | [diff] [blame] | 165 | result.append(&c, 1); |
Bjorn Bringert | fb903a4 | 2013-03-18 21:17:26 +0000 | [diff] [blame] | 166 | break; |
| 167 | } |
| 168 | } else { |
Anton Krumin | a2ef5c0 | 2014-03-12 14:46:44 -0700 | [diff] [blame] | 169 | result.append(&c, 1); |
| 170 | } |
| 171 | } else if (c == '%') { |
| 172 | // Placeholder syntax, no need to pseudolocalize |
| 173 | result += k_placeholder_open; |
| 174 | bool end = false; |
| 175 | result.append(&c, 1); |
| 176 | while (!end && i < I) { |
| 177 | ++i; |
| 178 | c = s[i]; |
| 179 | result.append(&c, 1); |
| 180 | if (is_possible_normal_placeholder_end(c)) { |
| 181 | end = true; |
| 182 | } else if (c == 't') { |
| 183 | ++i; |
| 184 | c = s[i]; |
| 185 | result.append(&c, 1); |
| 186 | end = true; |
| 187 | } |
| 188 | } |
| 189 | result += k_placeholder_close; |
| 190 | } else if (c == '<' || c == '&') { |
| 191 | // html syntax, no need to pseudolocalize |
| 192 | bool tag_closed = false; |
| 193 | while (!tag_closed && i < I) { |
| 194 | if (c == '&') { |
| 195 | String16 escape_text; |
| 196 | escape_text.append(&c, 1); |
| 197 | bool end = false; |
| 198 | size_t htmlCodePos = i; |
| 199 | while (!end && htmlCodePos < I) { |
| 200 | ++htmlCodePos; |
| 201 | c = s[htmlCodePos]; |
| 202 | escape_text.append(&c, 1); |
| 203 | // Valid html code |
| 204 | if (c == ';') { |
| 205 | end = true; |
| 206 | i = htmlCodePos; |
| 207 | } |
| 208 | // Wrong html code |
| 209 | else if (!((c == '#' || |
| 210 | (c >= 'a' && c <= 'z') || |
| 211 | (c >= 'A' && c <= 'Z') || |
| 212 | (c >= '0' && c <= '9')))) { |
| 213 | end = true; |
| 214 | } |
| 215 | } |
| 216 | result += escape_text; |
| 217 | if (escape_text != String16("<")) { |
| 218 | tag_closed = true; |
| 219 | } |
| 220 | continue; |
| 221 | } |
| 222 | if (c == '>') { |
| 223 | tag_closed = true; |
| 224 | result.append(&c, 1); |
| 225 | continue; |
| 226 | } |
| 227 | result.append(&c, 1); |
| 228 | i++; |
| 229 | c = s[i]; |
Bjorn Bringert | fb903a4 | 2013-03-18 21:17:26 +0000 | [diff] [blame] | 230 | } |
| 231 | } else { |
Anton Krumin | a2ef5c0 | 2014-03-12 14:46:44 -0700 | [diff] [blame] | 232 | // This is a pure text that should be pseudolocalized |
Bjorn Bringert | fb903a4 | 2013-03-18 21:17:26 +0000 | [diff] [blame] | 233 | const char* p = pseudolocalize_char(c); |
| 234 | if (p != NULL) { |
Anton Krumin | a2ef5c0 | 2014-03-12 14:46:44 -0700 | [diff] [blame] | 235 | result += String16(p); |
Bjorn Bringert | fb903a4 | 2013-03-18 21:17:26 +0000 | [diff] [blame] | 236 | } else { |
Anton Krumin | a2ef5c0 | 2014-03-12 14:46:44 -0700 | [diff] [blame] | 237 | result.append(&c, 1); |
Bjorn Bringert | fb903a4 | 2013-03-18 21:17:26 +0000 | [diff] [blame] | 238 | } |
| 239 | } |
| 240 | } |
Bjorn Bringert | fb903a4 | 2013-03-18 21:17:26 +0000 | [diff] [blame] | 241 | return result; |
| 242 | } |
| 243 | |
Anton Krumin | a2ef5c0 | 2014-03-12 14:46:44 -0700 | [diff] [blame] | 244 | String16 |
| 245 | pseudobidi_string(const String16& source) |
| 246 | { |
| 247 | const char16_t* s = source.string(); |
| 248 | String16 result; |
| 249 | result += k_rlm; |
| 250 | result += k_rlo; |
| 251 | for (size_t i=0; i<source.size(); i++) { |
| 252 | char16_t c = s[i]; |
| 253 | switch(c) { |
| 254 | case ' ': result += k_pdf; |
| 255 | result += k_rlm; |
| 256 | result.append(&c, 1); |
| 257 | result += k_rlm; |
| 258 | result += k_rlo; |
| 259 | break; |
| 260 | default: result.append(&c, 1); |
| 261 | break; |
| 262 | } |
| 263 | } |
| 264 | result += k_pdf; |
| 265 | result += k_rlm; |
| 266 | return result; |
| 267 | } |
Bjorn Bringert | fb903a4 | 2013-03-18 21:17:26 +0000 | [diff] [blame] | 268 | |