blob: 3aeb500ba6ff9a4c77f99d7e21249d05e8afdb2a [file] [log] [blame]
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001/*
2 * _johab.c: the Johab codec
3 *
4 * Written by Hye-Shik Chang <perky@FreeBSD.org>
5 * $CJKCodecs: _johab.c,v 1.3 2003/12/31 05:46:55 perky Exp $
6 */
7
8#include "codeccommon.h"
9
10ENCMAP(cp949)
11DECMAP(ksx1001)
12
13static const unsigned char u2johabidx_choseong[32] = {
14 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
15 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
16 0x10, 0x11, 0x12, 0x13, 0x14,
17};
18static const unsigned char u2johabidx_jungseong[32] = {
19 0x03, 0x04, 0x05, 0x06, 0x07,
20 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
21 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
22 0x1a, 0x1b, 0x1c, 0x1d,
23};
24static const unsigned char u2johabidx_jongseong[32] = {
25 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
26 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
27 0x10, 0x11, 0x13, 0x14, 0x15, 0x16, 0x17,
28 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d,
29};
30static const DBCHAR u2johabjamo[] = {
31 0x8841, 0x8c41, 0x8444, 0x9041, 0x8446, 0x8447, 0x9441,
32 0x9841, 0x9c41, 0x844a, 0x844b, 0x844c, 0x844d, 0x844e, 0x844f,
33 0x8450, 0xa041, 0xa441, 0xa841, 0x8454, 0xac41, 0xb041, 0xb441,
34 0xb841, 0xbc41, 0xc041, 0xc441, 0xc841, 0xcc41, 0xd041, 0x8461,
35 0x8481, 0x84a1, 0x84c1, 0x84e1, 0x8541, 0x8561, 0x8581, 0x85a1,
36 0x85c1, 0x85e1, 0x8641, 0x8661, 0x8681, 0x86a1, 0x86c1, 0x86e1,
37 0x8741, 0x8761, 0x8781, 0x87a1,
38};
39
40ENCODER(johab)
41{
42 while (inleft > 0) {
43 Py_UNICODE c = IN1;
44 DBCHAR code;
45
46 if (c < 0x80) {
47 WRITE1(c)
48 NEXT(1, 1)
49 continue;
50 }
51 UCS4INVALID(c)
52
53 RESERVE_OUTBUF(2)
54
55 if (c >= 0xac00 && c <= 0xd7a3) {
56 c -= 0xac00;
57 code = 0x8000 |
58 (u2johabidx_choseong[c / 588] << 10) |
59 (u2johabidx_jungseong[(c / 28) % 21] << 5) |
60 u2johabidx_jongseong[c % 28];
61 } else if (c >= 0x3131 && c <= 0x3163)
62 code = u2johabjamo[c - 0x3131];
63 else TRYMAP_ENC(cp949, code, c) {
64 unsigned char c1, c2, t2;
65 unsigned short t1;
66
67 assert((code & 0x8000) == 0);
68 c1 = code >> 8;
69 c2 = code & 0xff;
70 if (((c1 >= 0x21 && c1 <= 0x2c) || (c1 >= 0x4a && c1 <= 0x7d))
71 && (c2 >= 0x21 && c2 <= 0x7e)) {
72 t1 = (c1 < 0x4a ? (c1 - 0x21 + 0x1b2) : (c1 - 0x21 + 0x197));
73 t2 = ((t1 & 1) ? 0x5e : 0) + (c2 - 0x21);
74 OUT1(t1 >> 1)
75 OUT2(t2 < 0x4e ? t2 + 0x31 : t2 + 0x43)
76 NEXT(1, 2)
77 continue;
78 } else
79 return 1;
80 } else
81 return 1;
82
83 OUT1(code >> 8)
84 OUT2(code & 0xff)
85 NEXT(1, 2)
86 }
87
88 return 0;
89}
90
91#define FILL 0xfd
92#define NONE 0xff
93
94static const unsigned char johabidx_choseong[32] = {
95 NONE, FILL, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05,
96 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d,
97 0x0e, 0x0f, 0x10, 0x11, 0x12, NONE, NONE, NONE,
98 NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE,
99};
100static const unsigned char johabidx_jungseong[32] = {
101 NONE, NONE, FILL, 0x00, 0x01, 0x02, 0x03, 0x04,
102 NONE, NONE, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
103 NONE, NONE, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10,
104 NONE, NONE, 0x11, 0x12, 0x13, 0x14, NONE, NONE,
105};
106static const unsigned char johabidx_jongseong[32] = {
107 NONE, FILL, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
108 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
109 0x0f, 0x10, NONE, 0x11, 0x12, 0x13, 0x14, 0x15,
110 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, NONE, NONE,
111};
112
113static const unsigned char johabjamo_choseong[32] = {
114 NONE, FILL, 0x31, 0x32, 0x34, 0x37, 0x38, 0x39,
115 0x41, 0x42, 0x43, 0x45, 0x46, 0x47, 0x48, 0x49,
116 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, NONE, NONE, NONE,
117 NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE,
118};
119static const unsigned char johabjamo_jungseong[32] = {
120 NONE, NONE, FILL, 0x4f, 0x50, 0x51, 0x52, 0x53,
121 NONE, NONE, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59,
122 NONE, NONE, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f,
123 NONE, NONE, 0x60, 0x61, 0x62, 0x63, NONE, NONE,
124};
125static const unsigned char johabjamo_jongseong[32] = {
126 NONE, FILL, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
127 0x37, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f,
128 0x40, 0x41, NONE, 0x42, 0x44, 0x45, 0x46, 0x47,
129 0x48, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, NONE, NONE,
130};
131
132DECODER(johab)
133{
134 while (inleft > 0) {
135 unsigned char c = IN1, c2;
136
137 RESERVE_OUTBUF(1)
138
139 if (c < 0x80) {
140 OUT1(c)
141 NEXT(1, 1)
142 continue;
143 }
144
145 RESERVE_INBUF(2)
146 c2 = IN2;
147
148 if (c < 0xd8) {
149 /* johab hangul */
150 unsigned char c_cho, c_jung, c_jong;
151 unsigned char i_cho, i_jung, i_jong;
152
153 c_cho = (c >> 2) & 0x1f;
154 c_jung = ((c << 3) | c2 >> 5) & 0x1f;
155 c_jong = c2 & 0x1f;
156
157 i_cho = johabidx_choseong[c_cho];
158 i_jung = johabidx_jungseong[c_jung];
159 i_jong = johabidx_jongseong[c_jong];
160
161 if (i_cho == NONE || i_jung == NONE || i_jong == NONE)
162 return 2;
163
164 /* we don't use U+1100 hangul jamo yet. */
165 if (i_cho == FILL) {
166 if (i_jung == FILL) {
167 if (i_jong == FILL)
168 OUT1(0x3000)
169 else
170 OUT1(0x3100 | johabjamo_jongseong[c_jong])
171 } else {
172 if (i_jong == FILL)
173 OUT1(0x3100 | johabjamo_jungseong[c_jung])
174 else
175 return 2;
176 }
177 } else {
178 if (i_jung == FILL) {
179 if (i_jong == FILL)
180 OUT1(0x3100 | johabjamo_choseong[c_cho])
181 else
182 return 2;
183 } else
184 OUT1(0xac00 +
185 i_cho * 588 +
186 i_jung * 28 +
187 (i_jong == FILL ? 0 : i_jong))
188 }
189 NEXT(2, 1)
190 } else {
191 /* KS X 1001 except hangul jamos and syllables */
192 if (c == 0xdf || c > 0xf9 ||
193 c2 < 0x31 || (c2 >= 0x80 && c2 < 0x91) ||
194 (c2 & 0x7f) == 0x7f ||
195 (c == 0xda && (c2 >= 0xa1 && c2 <= 0xd3)))
196 return 2;
197 else {
198 unsigned char t1, t2;
199
200 t1 = (c < 0xe0 ? 2 * (c - 0xd9) : 2 * c - 0x197);
201 t2 = (c2 < 0x91 ? c2 - 0x31 : c2 - 0x43);
202 t1 = t1 + (t2 < 0x5e ? 0 : 1) + 0x21;
203 t2 = (t2 < 0x5e ? t2 : t2 - 0x5e) + 0x21;
204
205 TRYMAP_DEC(ksx1001, **outbuf, t1, t2);
206 else return 2;
207 NEXT(2, 1)
208 }
209 }
210 }
211
212 return 0;
213}
214#undef NONE
215#undef FILL
216
217#include "codecentry.h"
218BEGIN_CODEC_REGISTRY(johab)
219 MAPOPEN(ko_KR)
220 IMPORTMAP_DEC(ksx1001)
221 IMPORTMAP_ENC(cp949)
222 MAPCLOSE()
223END_CODEC_REGISTRY(johab)