blob: 0402be2ba9713d7f17db4206219b08011c5cc0cb [file] [log] [blame]
Torne (Richard Coles)58218062012-11-14 11:43:16 +00001// Copyright (c) 2012 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "ui/base/ime/character_composer.h"
6
Torne (Richard Coles)90dce4d2013-05-29 14:40:03 +01007#include <X11/Xlib.h>
8
Torne (Richard Coles)58218062012-11-14 11:43:16 +00009#include <algorithm>
10#include <iterator>
11
Torne (Richard Coles)868fa2f2013-06-11 10:57:03 +010012#include "base/strings/utf_string_conversions.h"
Torne (Richard Coles)58218062012-11-14 11:43:16 +000013#include "base/third_party/icu/icu_utf.h"
Torne (Richard Coles)58218062012-11-14 11:43:16 +000014// Note for Gtk removal: gdkkeysyms.h only contains a set of
15// '#define GDK_KeyName 0xNNNN' macros and does not #include any Gtk headers.
16#include "third_party/gtk+/gdk/gdkkeysyms.h"
17#include "ui/base/events/event_constants.h"
18#include "ui/base/glib/glib_integers.h"
Torne (Richard Coles)90dce4d2013-05-29 14:40:03 +010019#include "ui/base/x/x11_util.h"
Torne (Richard Coles)58218062012-11-14 11:43:16 +000020
Torne (Richard Coles)2a99a7e2013-03-28 15:31:22 +000021// Note for Gtk removal: gtkimcontextsimpleseqs.h does not #include any Gtk
22// headers and only contains one big guint16 array |gtk_compose_seqs_compact|
23// which defines the main compose table. The table has internal linkage.
24// The order of header inclusion is out of order because
25// gtkimcontextsimpleseqs.h depends on guint16, which is defined in
26// "ui/base/glib/glib_integers.h".
27#include "third_party/gtk+/gtk/gtkimcontextsimpleseqs.h"
28
Torne (Richard Coles)58218062012-11-14 11:43:16 +000029namespace {
30
Ben Murdoch7dbb3d52013-07-17 14:55:54 +010031// A black list for not composing dead keys. Once the key combination is listed
32// below, the dead key won't work even when this is listed in
33// gtkimcontextsimpleseqs.h. This only supports two keyevent sequenses.
34// TODO(nona): Remove this hack.
35const struct BlackListedDeadKey {
36 uint32 first_key; // target first key event.
37 uint32 second_key; // target second key event.
38 uint32 output_char; // the character to be inserted if the filter is matched.
39 bool consume; // true if the original key event will be consumed.
40} kBlackListedDeadKeys[] = {
Ben Murdochbb1529c2013-08-08 10:24:53 +010041 { GDK_KEY_dead_acute, GDK_KEY_m, GDK_KEY_apostrophe, false },
Ben Murdoch7dbb3d52013-07-17 14:55:54 +010042 { GDK_KEY_dead_acute, GDK_KEY_s, GDK_KEY_apostrophe, false },
43 { GDK_KEY_dead_acute, GDK_KEY_t, GDK_KEY_apostrophe, false },
Ben Murdochbb1529c2013-08-08 10:24:53 +010044 { GDK_KEY_dead_acute, GDK_KEY_v, GDK_KEY_apostrophe, false },
Ben Murdoch7dbb3d52013-07-17 14:55:54 +010045 { GDK_KEY_dead_acute, GDK_KEY_dead_acute, GDK_KEY_apostrophe, true },
46};
47
Torne (Richard Coles)58218062012-11-14 11:43:16 +000048typedef std::vector<unsigned int> ComposeBufferType;
49
50// An iterator class to apply std::lower_bound for composition table.
51class SequenceIterator
52 : public std::iterator<std::random_access_iterator_tag, const uint16*> {
53 public:
54 SequenceIterator() : ptr_(NULL), stride_(0) {}
55 SequenceIterator(const uint16* ptr, int stride)
56 : ptr_(ptr), stride_(stride) {}
57
58 const uint16* ptr() const {return ptr_;}
59 int stride() const {return stride_;}
60
61 SequenceIterator& operator++() {
62 ptr_ += stride_;
63 return *this;
64 }
65 SequenceIterator& operator+=(int n) {
66 ptr_ += stride_*n;
67 return *this;
68 }
69
70 const uint16* operator*() const {return ptr_;}
71
72 private:
73 const uint16* ptr_;
74 int stride_;
75};
76
77inline SequenceIterator operator+(const SequenceIterator& l, int r) {
78 return SequenceIterator(l) += r;
79}
80
81inline int operator-(const SequenceIterator& l, const SequenceIterator& r) {
82 const int d = l.ptr() - r.ptr();
83 DCHECK(l.stride() == r.stride() && l.stride() > 0 && d%l.stride() == 0);
84 return d/l.stride();
85}
86
87inline bool operator==(const SequenceIterator& l, const SequenceIterator& r) {
88 DCHECK(l.stride() == r.stride());
89 return l.ptr() == r.ptr();
90}
91
92inline bool operator!=(const SequenceIterator& l, const SequenceIterator& r) {
93 return !(l == r);
94}
95
96// A function to compare key value.
97inline int CompareSequenceValue(unsigned int l, unsigned int r) {
98 return (l > r) ? 1 : ((l < r) ? -1 : 0);
99}
100
101// A template to make |CompareFunc| work like operator<.
102// |CompareFunc| is required to implement a member function,
103// int operator()(const ComposeBufferType& l, const uint16* r) const.
104template<typename CompareFunc>
105struct ComparatorAdoptor {
106 bool operator()(const ComposeBufferType& l, const uint16* r) const {
107 return CompareFunc()(l, r) == -1;
108 }
109 bool operator()(const uint16* l, const ComposeBufferType& r) const {
110 return CompareFunc()(r, l) == 1;
111 }
112};
113
114class ComposeChecker {
115 public:
116 // This class does not take the ownership of |data|, |data| should be alive
117 // for the lifetime of the object.
118 // |data| is a pointer to the head of an array of
119 // length (|max_sequence_length| + 2)*|n_sequences|.
120 // Every (|max_sequence_length| + 2) elements of |data| represent an entry.
121 // First |max_sequence_length| elements of an entry is the sequecne which
122 // composes the character represented by the last two elements of the entry.
123 ComposeChecker(const uint16* data, int max_sequence_length, int n_sequences);
124 bool CheckSequence(const ComposeBufferType& sequence,
125 uint32* composed_character) const;
126
127 private:
128 struct CompareSequence {
129 int operator()(const ComposeBufferType& l, const uint16* r) const;
130 };
131
132 // This class does not take the ownership of |data_|,
133 // the dtor does not delete |data_|.
134 const uint16* data_;
135 int max_sequence_length_;
136 int n_sequences_;
137 int row_stride_;
138
139 DISALLOW_COPY_AND_ASSIGN(ComposeChecker);
140};
141
142ComposeChecker::ComposeChecker(const uint16* data,
143 int max_sequence_length,
144 int n_sequences)
145 : data_(data),
146 max_sequence_length_(max_sequence_length),
147 n_sequences_(n_sequences),
148 row_stride_(max_sequence_length + 2) {
149}
150
151bool ComposeChecker::CheckSequence(const ComposeBufferType& sequence,
152 uint32* composed_character) const {
153 const int sequence_length = sequence.size();
154 if (sequence_length > max_sequence_length_)
155 return false;
156 // Find sequence in the table.
157 const SequenceIterator begin(data_, row_stride_);
158 const SequenceIterator end = begin + n_sequences_;
159 const SequenceIterator found = std::lower_bound(
160 begin, end, sequence, ComparatorAdoptor<CompareSequence>());
161 if (found == end || CompareSequence()(sequence, *found) != 0)
162 return false;
163
164 if (sequence_length == max_sequence_length_ ||
165 (*found)[sequence_length] == 0) {
166 // |found| is not partially matching. It's fully matching.
167 if (found + 1 == end ||
168 CompareSequence()(sequence, *(found + 1)) != 0) {
169 // There is no composition longer than |found| which matches to
170 // |sequence|.
171 const uint32 value = ((*found)[max_sequence_length_] << 16) |
172 (*found)[max_sequence_length_ + 1];
173 *composed_character = value;
174 }
175 }
176 return true;
177}
178
179int ComposeChecker::CompareSequence::operator()(const ComposeBufferType& l,
180 const uint16* r) const {
181 for(size_t i = 0; i < l.size(); ++i) {
182 const int compare_result = CompareSequenceValue(l[i], r[i]);
183 if(compare_result)
184 return compare_result;
185 }
186 return 0;
187}
188
189
190class ComposeCheckerWithCompactTable {
191 public:
192 // This class does not take the ownership of |data|, |data| should be alive
193 // for the lifetime of the object.
194 // First |index_size|*|index_stride| elements of |data| are an index table.
195 // Every |index_stride| elements of an index table are an index entry.
196 // If you are checking with a sequence of length N beginning with character C,
197 // you have to find an index entry whose first element is C, then get the N-th
198 // element of the index entry as the index.
199 // The index is pointing the element of |data| where the composition table for
200 // sequences of length N beginning with C is placed.
201
202 ComposeCheckerWithCompactTable(const uint16* data,
203 int max_sequence_length,
204 int index_size,
205 int index_stride);
206 bool CheckSequence(const ComposeBufferType& sequence,
207 uint32* composed_character) const;
208
209 private:
210 struct CompareSequenceFront {
211 int operator()(const ComposeBufferType& l, const uint16* r) const;
212 };
213 struct CompareSequenceSkipFront {
214 int operator()(const ComposeBufferType& l, const uint16* r) const;
215 };
216
217 // This class does not take the ownership of |data_|,
218 // the dtor does not delete |data_|.
219 const uint16* data_;
220 int max_sequence_length_;
221 int index_size_;
222 int index_stride_;
223};
224
225ComposeCheckerWithCompactTable::ComposeCheckerWithCompactTable(
226 const uint16* data,
227 int max_sequence_length,
228 int index_size,
229 int index_stride)
230 : data_(data),
231 max_sequence_length_(max_sequence_length),
232 index_size_(index_size),
233 index_stride_(index_stride) {
234}
235
236bool ComposeCheckerWithCompactTable::CheckSequence(
237 const ComposeBufferType& sequence,
238 uint32* composed_character) const {
239 const int compose_length = sequence.size();
240 if (compose_length > max_sequence_length_)
241 return false;
242 // Find corresponding index for the first keypress.
243 const SequenceIterator index_begin(data_, index_stride_);
244 const SequenceIterator index_end = index_begin + index_size_;
245 const SequenceIterator index =
246 std::lower_bound(index_begin, index_end, sequence,
247 ComparatorAdoptor<CompareSequenceFront>());
248 if (index == index_end || CompareSequenceFront()(sequence, *index) != 0)
249 return false;
250 if (compose_length == 1)
251 return true;
252 // Check for composition sequences.
253 for (int length = compose_length - 1; length < max_sequence_length_;
254 ++length) {
255 const uint16* table = data_ + (*index)[length];
256 const uint16* table_next = data_ + (*index)[length + 1];
257 if (table_next > table) {
258 // There are composition sequences for this |length|.
259 const int row_stride = length + 1;
260 const int n_sequences = (table_next - table)/row_stride;
261 const SequenceIterator table_begin(table, row_stride);
262 const SequenceIterator table_end = table_begin + n_sequences;
263 const SequenceIterator found =
264 std::lower_bound(table_begin, table_end, sequence,
265 ComparatorAdoptor<CompareSequenceSkipFront>());
266 if (found != table_end &&
267 CompareSequenceSkipFront()(sequence, *found) == 0) {
268 if (length == compose_length - 1) // Exact match.
269 *composed_character = (*found)[length];
270 return true;
271 }
272 }
273 }
274 return false;
275}
276
277int ComposeCheckerWithCompactTable::CompareSequenceFront::operator()(
278 const ComposeBufferType& l, const uint16* r) const {
279 return CompareSequenceValue(l[0], r[0]);
280}
281
282int ComposeCheckerWithCompactTable::CompareSequenceSkipFront::operator()(
283 const ComposeBufferType& l, const uint16* r) const {
284 for(size_t i = 1; i < l.size(); ++i) {
285 const int compare_result = CompareSequenceValue(l[i], r[i - 1]);
286 if(compare_result)
287 return compare_result;
288 }
289 return 0;
290}
291
292
Torne (Richard Coles)58218062012-11-14 11:43:16 +0000293// Additional table.
294
295// The difference between this and the default input method is the handling
296// of C+acute - this method produces C WITH CEDILLA rather than C WITH ACUTE.
297// For languages that use CCedilla and not acute, this is the preferred mapping,
298// and is particularly important for pt_BR, where the us-intl keyboard is
299// used extensively.
300
301const uint16 cedilla_compose_seqs[] = {
302 // LATIN_CAPITAL_LETTER_C_WITH_CEDILLA
303 GDK_KEY_dead_acute, GDK_KEY_C, 0, 0, 0, 0x00C7,
304 // LATIN_SMALL_LETTER_C_WITH_CEDILLA
305 GDK_KEY_dead_acute, GDK_KEY_c, 0, 0, 0, 0x00E7,
306 // LATIN_CAPITAL_LETTER_C_WITH_CEDILLA
307 GDK_KEY_Multi_key, GDK_KEY_apostrophe, GDK_KEY_C, 0, 0, 0x00C7,
308 // LATIN_SMALL_LETTER_C_WITH_CEDILLA
309 GDK_KEY_Multi_key, GDK_KEY_apostrophe, GDK_KEY_c, 0, 0, 0x00E7,
310 // LATIN_CAPITAL_LETTER_C_WITH_CEDILLA
311 GDK_KEY_Multi_key, GDK_KEY_C, GDK_KEY_apostrophe, 0, 0, 0x00C7,
312 // LATIN_SMALL_LETTER_C_WITH_CEDILLA
313 GDK_KEY_Multi_key, GDK_KEY_c, GDK_KEY_apostrophe, 0, 0, 0x00E7,
314};
315
316bool KeypressShouldBeIgnored(unsigned int keyval) {
317 switch(keyval) {
318 case GDK_KEY_Shift_L:
319 case GDK_KEY_Shift_R:
320 case GDK_KEY_Control_L:
321 case GDK_KEY_Control_R:
322 case GDK_KEY_Caps_Lock:
323 case GDK_KEY_Shift_Lock:
324 case GDK_KEY_Meta_L:
325 case GDK_KEY_Meta_R:
326 case GDK_KEY_Alt_L:
327 case GDK_KEY_Alt_R:
328 case GDK_KEY_Super_L:
329 case GDK_KEY_Super_R:
330 case GDK_KEY_Hyper_L:
331 case GDK_KEY_Hyper_R:
332 case GDK_KEY_Mode_switch:
333 case GDK_KEY_ISO_Level3_Shift:
334 return true;
335 default:
336 return false;
337 }
338}
339
340bool CheckCharacterComposeTable(const ComposeBufferType& sequence,
341 uint32* composed_character) {
342 // Check cedilla compose table.
343 const ComposeChecker kCedillaComposeChecker(
344 cedilla_compose_seqs, 4, arraysize(cedilla_compose_seqs)/(4 + 2));
345 if (kCedillaComposeChecker.CheckSequence(sequence, composed_character))
346 return true;
347
348 // Check main compose table.
349 const ComposeCheckerWithCompactTable kMainComposeChecker(
350 gtk_compose_seqs_compact, 5, 24, 6);
351 if (kMainComposeChecker.CheckSequence(sequence, composed_character))
352 return true;
353
354 return false;
355}
356
357// Converts |character| to UTF16 string.
358// Returns false when |character| is not a valid character.
359bool UTF32CharacterToUTF16(uint32 character, string16* output) {
360 output->clear();
361 // Reject invalid character. (e.g. codepoint greater than 0x10ffff)
362 if (!CBU_IS_UNICODE_CHAR(character))
363 return false;
364 if (character) {
365 output->resize(CBU16_LENGTH(character));
366 size_t i = 0;
367 CBU16_APPEND_UNSAFE(&(*output)[0], i, character);
368 }
369 return true;
370}
371
Torne (Richard Coles)90dce4d2013-05-29 14:40:03 +0100372// Converts a X keycode to a X keysym with no modifiers.
373KeySym XKeyCodeToXKeySym(unsigned int keycode) {
374 Display* display = ui::GetXDisplay();
375 if (!display)
376 return NoSymbol;
377
378 XKeyEvent x_key_event = {0};
379 x_key_event.type = KeyPress;
380 x_key_event.display = display;
381 x_key_event.keycode = keycode;
382 return ::XLookupKeysym(&x_key_event, 0);
383}
384
Torne (Richard Coles)58218062012-11-14 11:43:16 +0000385// Returns an hexadecimal digit integer (0 to 15) corresponding to |keyval|.
386// -1 is returned when |keyval| cannot be a hexadecimal digit.
387int KeyvalToHexDigit(unsigned int keyval) {
388 if (GDK_KEY_0 <= keyval && keyval <= GDK_KEY_9)
389 return keyval - GDK_KEY_0;
390 if (GDK_KEY_a <= keyval && keyval <= GDK_KEY_f)
391 return keyval - GDK_KEY_a + 10;
392 if (GDK_KEY_A <= keyval && keyval <= GDK_KEY_F)
393 return keyval - GDK_KEY_A + 10;
394 return -1; // |keyval| cannot be a hexadecimal digit.
395}
396
397} // namespace
398
399namespace ui {
400
401CharacterComposer::CharacterComposer() : composition_mode_(KEY_SEQUENCE_MODE) {}
402
403CharacterComposer::~CharacterComposer() {}
404
405void CharacterComposer::Reset() {
406 compose_buffer_.clear();
407 composed_character_.clear();
408 preedit_string_.clear();
409 composition_mode_ = KEY_SEQUENCE_MODE;
410}
411
412bool CharacterComposer::FilterKeyPress(unsigned int keyval,
Torne (Richard Coles)90dce4d2013-05-29 14:40:03 +0100413 unsigned int keycode,
414 int flags) {
Torne (Richard Coles)58218062012-11-14 11:43:16 +0000415 composed_character_.clear();
416 preedit_string_.clear();
417
418 // We don't care about modifier key presses.
419 if(KeypressShouldBeIgnored(keyval))
420 return false;
421
422 // When the user presses Ctrl+Shift+U, maybe switch to HEX_MODE.
423 // We don't care about other modifiers like Alt. When CapsLock is down, we
424 // do nothing because what we receive is Ctrl+Shift+u (not U).
425 if (keyval == GDK_KEY_U && (flags & EF_SHIFT_DOWN) &&
426 (flags & EF_CONTROL_DOWN)) {
427 if (composition_mode_ == KEY_SEQUENCE_MODE && compose_buffer_.empty()) {
428 // There is no ongoing composition. Let's switch to HEX_MODE.
429 composition_mode_ = HEX_MODE;
430 UpdatePreeditStringHexMode();
431 return true;
432 }
433 }
434
435 // Filter key press in an appropriate manner.
436 switch (composition_mode_) {
437 case KEY_SEQUENCE_MODE:
Torne (Richard Coles)90dce4d2013-05-29 14:40:03 +0100438 return FilterKeyPressSequenceMode(keyval, keycode, flags);
Torne (Richard Coles)58218062012-11-14 11:43:16 +0000439 case HEX_MODE:
Torne (Richard Coles)90dce4d2013-05-29 14:40:03 +0100440 return FilterKeyPressHexMode(keyval, keycode, flags);
Torne (Richard Coles)58218062012-11-14 11:43:16 +0000441 default:
442 NOTREACHED();
443 return false;
444 }
445}
446
447bool CharacterComposer::FilterKeyPressSequenceMode(unsigned int keyval,
Torne (Richard Coles)90dce4d2013-05-29 14:40:03 +0100448 unsigned int keycode,
449 int flags) {
Torne (Richard Coles)58218062012-11-14 11:43:16 +0000450 DCHECK(composition_mode_ == KEY_SEQUENCE_MODE);
451 compose_buffer_.push_back(keyval);
452
Ben Murdoch7dbb3d52013-07-17 14:55:54 +0100453 if (compose_buffer_.size() == 2U) {
454 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(kBlackListedDeadKeys); ++i) {
455 if (compose_buffer_[0] == kBlackListedDeadKeys[i].first_key &&
456 compose_buffer_[1] == kBlackListedDeadKeys[i].second_key ) {
457 Reset();
458 composed_character_.push_back(kBlackListedDeadKeys[i].output_char);
459 return kBlackListedDeadKeys[i].consume;
460 }
461 }
462 }
463
Torne (Richard Coles)58218062012-11-14 11:43:16 +0000464 // Check compose table.
465 uint32 composed_character_utf32 = 0;
466 if (CheckCharacterComposeTable(compose_buffer_, &composed_character_utf32)) {
467 // Key press is recognized as a part of composition.
468 if (composed_character_utf32 != 0) {
469 // We get a composed character.
470 compose_buffer_.clear();
471 UTF32CharacterToUTF16(composed_character_utf32, &composed_character_);
472 }
473 return true;
474 }
475 // Key press is not a part of composition.
476 compose_buffer_.pop_back(); // Remove the keypress added this time.
477 if (!compose_buffer_.empty()) {
478 compose_buffer_.clear();
479 return true;
480 }
481 return false;
482}
483
484bool CharacterComposer::FilterKeyPressHexMode(unsigned int keyval,
Torne (Richard Coles)90dce4d2013-05-29 14:40:03 +0100485 unsigned int keycode,
486 int flags) {
Torne (Richard Coles)58218062012-11-14 11:43:16 +0000487 DCHECK(composition_mode_ == HEX_MODE);
488 const size_t kMaxHexSequenceLength = 8;
Torne (Richard Coles)90dce4d2013-05-29 14:40:03 +0100489 int hex_digit = KeyvalToHexDigit(keyval);
490 if (hex_digit < 0) {
491 // With 101 keyboard, control + shift + 3 produces '#', but a user may
492 // have intended to type '3'. So, if a hexadecimal character was not found,
493 // suppose a user is holding shift key (and possibly control key, too) and
494 // try a character with modifier keys removed.
495 hex_digit = KeyvalToHexDigit(XKeyCodeToXKeySym(keycode));
496 }
Torne (Richard Coles)58218062012-11-14 11:43:16 +0000497
498 if (keyval == GDK_KEY_Escape) {
499 // Cancel composition when ESC is pressed.
500 Reset();
501 } else if (keyval == GDK_KEY_Return || keyval == GDK_KEY_KP_Enter ||
502 keyval == GDK_KEY_ISO_Enter ||
503 keyval == GDK_KEY_space || keyval == GDK_KEY_KP_Space) {
504 // Commit the composed character when Enter or space is pressed.
505 CommitHex();
506 } else if (keyval == GDK_KEY_BackSpace) {
507 // Pop back the buffer when Backspace is pressed.
508 if (!compose_buffer_.empty()) {
509 compose_buffer_.pop_back();
510 } else {
511 // If there is no character in |compose_buffer_|, cancel composition.
512 Reset();
513 }
514 } else if (hex_digit >= 0 &&
515 compose_buffer_.size() < kMaxHexSequenceLength) {
516 // Add the key to the buffer if it is a hex digit.
517 compose_buffer_.push_back(hex_digit);
518 }
519
520 UpdatePreeditStringHexMode();
521
522 return true;
523}
524
525void CharacterComposer::CommitHex() {
526 DCHECK(composition_mode_ == HEX_MODE);
527 uint32 composed_character_utf32 = 0;
528 for (size_t i = 0; i != compose_buffer_.size(); ++i) {
529 const uint32 digit = compose_buffer_[i];
530 DCHECK(0 <= digit && digit < 16);
531 composed_character_utf32 <<= 4;
532 composed_character_utf32 |= digit;
533 }
534 Reset();
535 UTF32CharacterToUTF16(composed_character_utf32, &composed_character_);
536}
537
538void CharacterComposer::UpdatePreeditStringHexMode() {
539 if (composition_mode_ != HEX_MODE) {
540 preedit_string_.clear();
541 return;
542 }
543 std::string preedit_string_ascii("u");
544 for (size_t i = 0; i != compose_buffer_.size(); ++i) {
545 const int digit = compose_buffer_[i];
546 DCHECK(0 <= digit && digit < 16);
547 preedit_string_ascii += digit <= 9 ? ('0' + digit) : ('a' + (digit - 10));
548 }
549 preedit_string_ = ASCIIToUTF16(preedit_string_ascii);
550}
551
552} // namespace ui