Kernel utf-8 handling
This patch fixes dead keys and copy/paste of non-ASCII characters in UTF-8
mode on Linux console. See more details about the original patch at:
http://chris.heathens.co.nz/linux/utf8.html
Already posted on
(Oldest) http://lkml.org/lkml/2003/5/31/148
http://lkml.org/lkml/2005/12/24/69
(Recent) http://lkml.org/lkml/2006/8/7/75
[bunk@stusta.de: make drivers/char/selection.c:store_utf8() static]
Signed-off-by: Jan Engelhardt <jengelh@gmx.de>
Cc: Alexander E. Patrakov <patrakov@ums.usu.ru>
Cc: Dmitry Torokhov <dtor@mail.ru>
Cc: "Antonino A. Daplas" <adaplas@pol.net>
Signed-off-by: Adrian Bunk <bunk@stusta.de>
Cc: David Woodhouse <dwmw2@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
diff --git a/drivers/char/keyboard.c b/drivers/char/keyboard.c
index 90965b4..2ce0af1 100644
--- a/drivers/char/keyboard.c
+++ b/drivers/char/keyboard.c
@@ -24,6 +24,7 @@
* 21-08-02: Converted to input API, major cleanup. (Vojtech Pavlik)
*/
+#include <linux/consolemap.h>
#include <linux/module.h>
#include <linux/sched.h>
#include <linux/tty.h>
@@ -308,10 +309,9 @@
* Many other routines do put_queue, but I think either
* they produce ASCII, or they produce some user-assigned
* string, and in both cases we might assume that it is
- * in utf-8 already. UTF-8 is defined for words of up to 31 bits,
- * but we need only 16 bits here
+ * in utf-8 already.
*/
-static void to_utf8(struct vc_data *vc, ushort c)
+static void to_utf8(struct vc_data *vc, uint c)
{
if (c < 0x80)
/* 0******* */
@@ -320,11 +320,21 @@
/* 110***** 10****** */
put_queue(vc, 0xc0 | (c >> 6));
put_queue(vc, 0x80 | (c & 0x3f));
- } else {
+ } else if (c < 0x10000) {
+ if (c >= 0xD800 && c < 0xE000)
+ return;
+ if (c == 0xFFFF)
+ return;
/* 1110**** 10****** 10****** */
put_queue(vc, 0xe0 | (c >> 12));
put_queue(vc, 0x80 | ((c >> 6) & 0x3f));
put_queue(vc, 0x80 | (c & 0x3f));
+ } else if (c < 0x110000) {
+ /* 11110*** 10****** 10****** 10****** */
+ put_queue(vc, 0xf0 | (c >> 18));
+ put_queue(vc, 0x80 | ((c >> 12) & 0x3f));
+ put_queue(vc, 0x80 | ((c >> 6) & 0x3f));
+ put_queue(vc, 0x80 | (c & 0x3f));
}
}
@@ -393,7 +403,7 @@
return d;
if (kbd->kbdmode == VC_UNICODE)
- to_utf8(vc, d);
+ to_utf8(vc, conv_8bit_to_uni(d));
else if (d < 0x100)
put_queue(vc, d);
@@ -407,7 +417,7 @@
{
if (diacr) {
if (kbd->kbdmode == VC_UNICODE)
- to_utf8(vc, diacr);
+ to_utf8(vc, conv_8bit_to_uni(diacr));
else if (diacr < 0x100)
put_queue(vc, diacr);
diacr = 0;
@@ -617,7 +627,7 @@
return;
}
if (kbd->kbdmode == VC_UNICODE)
- to_utf8(vc, value);
+ to_utf8(vc, conv_8bit_to_uni(value));
else if (value < 0x100)
put_queue(vc, value);
}
@@ -775,7 +785,7 @@
/* kludge */
if (up_flag && shift_state != old_state && npadch != -1) {
if (kbd->kbdmode == VC_UNICODE)
- to_utf8(vc, npadch & 0xffff);
+ to_utf8(vc, npadch);
else
put_queue(vc, npadch & 0xff);
npadch = -1;