blob: a500696e9312f201437c42ed03a0ba0d1c2c5efd [file] [log] [blame]
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001/*
2 * _codecs_jp.c: Codecs collection for Japanese encodings
3 *
4 * Written by Hye-Shik Chang <perky@FreeBSD.org>
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00005 */
6
7#define USING_BINARY_PAIR_SEARCH
8#define EMPBASE 0x20000
9
10#include "cjkcodecs.h"
11#include "mappings_jp.h"
12#include "mappings_jisx0213_pair.h"
13#include "alg_jisx0201.h"
14#include "emu_jisx0213_2000.h"
15
16/*
17 * CP932 codec
18 */
19
20ENCODER(cp932)
21{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000022 while (inleft > 0) {
23 Py_UNICODE c = IN1;
24 DBCHAR code;
25 unsigned char c1, c2;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +000026
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000027 if (c <= 0x80) {
28 WRITE1((unsigned char)c)
29 NEXT(1, 1)
30 continue;
31 }
32 else if (c >= 0xff61 && c <= 0xff9f) {
33 WRITE1(c - 0xfec0)
34 NEXT(1, 1)
35 continue;
36 }
37 else if (c >= 0xf8f0 && c <= 0xf8f3) {
38 /* Windows compatibility */
39 REQUIRE_OUTBUF(1)
40 if (c == 0xf8f0)
41 OUT1(0xa0)
42 else
43 OUT1(c - 0xfef1 + 0xfd)
44 NEXT(1, 1)
45 continue;
46 }
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +000047
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000048 UCS4INVALID(c)
49 REQUIRE_OUTBUF(2)
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +000050
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000051 TRYMAP_ENC(cp932ext, code, c) {
52 OUT1(code >> 8)
53 OUT2(code & 0xff)
54 }
55 else TRYMAP_ENC(jisxcommon, code, c) {
56 if (code & 0x8000) /* MSB set: JIS X 0212 */
57 return 1;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +000058
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000059 /* JIS X 0208 */
60 c1 = code >> 8;
61 c2 = code & 0xff;
62 c2 = (((c1 - 0x21) & 1) ? 0x5e : 0) + (c2 - 0x21);
63 c1 = (c1 - 0x21) >> 1;
64 OUT1(c1 < 0x1f ? c1 + 0x81 : c1 + 0xc1)
65 OUT2(c2 < 0x3f ? c2 + 0x40 : c2 + 0x41)
66 }
67 else if (c >= 0xe000 && c < 0xe758) {
68 /* User-defined area */
69 c1 = (Py_UNICODE)(c - 0xe000) / 188;
70 c2 = (Py_UNICODE)(c - 0xe000) % 188;
71 OUT1(c1 + 0xf0)
72 OUT2(c2 < 0x3f ? c2 + 0x40 : c2 + 0x41)
73 }
74 else
75 return 1;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +000076
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000077 NEXT(1, 2)
78 }
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +000079
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000080 return 0;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +000081}
82
83DECODER(cp932)
84{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000085 while (inleft > 0) {
86 unsigned char c = IN1, c2;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +000087
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000088 REQUIRE_OUTBUF(1)
89 if (c <= 0x80) {
90 OUT1(c)
91 NEXT(1, 1)
92 continue;
93 }
94 else if (c >= 0xa0 && c <= 0xdf) {
95 if (c == 0xa0)
96 OUT1(0xf8f0) /* half-width katakana */
97 else
98 OUT1(0xfec0 + c)
99 NEXT(1, 1)
100 continue;
101 }
102 else if (c >= 0xfd/* && c <= 0xff*/) {
103 /* Windows compatibility */
104 OUT1(0xf8f1 - 0xfd + c)
105 NEXT(1, 1)
106 continue;
107 }
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000108
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000109 REQUIRE_INBUF(2)
110 c2 = IN2;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000111
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000112 TRYMAP_DEC(cp932ext, **outbuf, c, c2);
113 else if ((c >= 0x81 && c <= 0x9f) || (c >= 0xe0 && c <= 0xea)){
114 if (c2 < 0x40 || (c2 > 0x7e && c2 < 0x80) || c2 > 0xfc)
Victor Stinner2cded9c2011-07-08 01:45:13 +0200115 return 1;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000116
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000117 c = (c < 0xe0 ? c - 0x81 : c - 0xc1);
118 c2 = (c2 < 0x80 ? c2 - 0x40 : c2 - 0x41);
119 c = (2 * c + (c2 < 0x5e ? 0 : 1) + 0x21);
120 c2 = (c2 < 0x5e ? c2 : c2 - 0x5e) + 0x21;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000121
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000122 TRYMAP_DEC(jisx0208, **outbuf, c, c2);
Victor Stinner2cded9c2011-07-08 01:45:13 +0200123 else return 1;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000124 }
125 else if (c >= 0xf0 && c <= 0xf9) {
126 if ((c2 >= 0x40 && c2 <= 0x7e) ||
127 (c2 >= 0x80 && c2 <= 0xfc))
128 OUT1(0xe000 + 188 * (c - 0xf0) +
129 (c2 < 0x80 ? c2 - 0x40 : c2 - 0x41))
130 else
Victor Stinner2cded9c2011-07-08 01:45:13 +0200131 return 1;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000132 }
133 else
Victor Stinner2cded9c2011-07-08 01:45:13 +0200134 return 1;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000135
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000136 NEXT(2, 1)
137 }
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000138
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000139 return 0;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000140}
141
142
143/*
144 * EUC-JIS-2004 codec
145 */
146
147ENCODER(euc_jis_2004)
148{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000149 while (inleft > 0) {
150 ucs4_t c = IN1;
151 DBCHAR code;
152 Py_ssize_t insize;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000153
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000154 if (c < 0x80) {
155 WRITE1(c)
156 NEXT(1, 1)
157 continue;
158 }
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000159
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000160 DECODE_SURROGATE(c)
161 insize = GET_INSIZE(c);
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000162
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000163 if (c <= 0xFFFF) {
164 EMULATE_JISX0213_2000_ENCODE_BMP(code, c)
165 else TRYMAP_ENC(jisx0213_bmp, code, c) {
166 if (code == MULTIC) {
167 if (inleft < 2) {
168 if (flags & MBENC_FLUSH) {
169 code = find_pairencmap(
170 (ucs2_t)c, 0,
171 jisx0213_pair_encmap,
172 JISX0213_ENCPAIRS);
173 if (code == DBCINV)
174 return 1;
175 }
176 else
177 return MBERR_TOOFEW;
178 }
179 else {
180 code = find_pairencmap(
181 (ucs2_t)c, (*inbuf)[1],
182 jisx0213_pair_encmap,
183 JISX0213_ENCPAIRS);
184 if (code == DBCINV) {
185 code = find_pairencmap(
186 (ucs2_t)c, 0,
187 jisx0213_pair_encmap,
188 JISX0213_ENCPAIRS);
189 if (code == DBCINV)
190 return 1;
191 } else
192 insize = 2;
193 }
194 }
195 }
196 else TRYMAP_ENC(jisxcommon, code, c);
197 else if (c >= 0xff61 && c <= 0xff9f) {
198 /* JIS X 0201 half-width katakana */
199 WRITE2(0x8e, c - 0xfec0)
200 NEXT(1, 2)
201 continue;
202 }
203 else if (c == 0xff3c)
204 /* F/W REVERSE SOLIDUS (see NOTES) */
205 code = 0x2140;
206 else if (c == 0xff5e)
207 /* F/W TILDE (see NOTES) */
208 code = 0x2232;
209 else
210 return 1;
211 }
212 else if (c >> 16 == EMPBASE >> 16) {
213 EMULATE_JISX0213_2000_ENCODE_EMP(code, c)
214 else TRYMAP_ENC(jisx0213_emp, code, c & 0xffff);
215 else return insize;
216 }
217 else
218 return insize;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000219
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000220 if (code & 0x8000) {
221 /* Codeset 2 */
222 WRITE3(0x8f, code >> 8, (code & 0xFF) | 0x80)
223 NEXT(insize, 3)
224 } else {
225 /* Codeset 1 */
226 WRITE2((code >> 8) | 0x80, (code & 0xFF) | 0x80)
227 NEXT(insize, 2)
228 }
229 }
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000230
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000231 return 0;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000232}
233
234DECODER(euc_jis_2004)
235{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000236 while (inleft > 0) {
237 unsigned char c = IN1;
238 ucs4_t code;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000239
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000240 REQUIRE_OUTBUF(1)
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000241
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000242 if (c < 0x80) {
243 OUT1(c)
244 NEXT(1, 1)
245 continue;
246 }
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000247
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000248 if (c == 0x8e) {
249 /* JIS X 0201 half-width katakana */
250 unsigned char c2;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000251
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000252 REQUIRE_INBUF(2)
253 c2 = IN2;
254 if (c2 >= 0xa1 && c2 <= 0xdf) {
255 OUT1(0xfec0 + c2)
256 NEXT(2, 1)
257 }
258 else
Victor Stinner2cded9c2011-07-08 01:45:13 +0200259 return 1;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000260 }
261 else if (c == 0x8f) {
262 unsigned char c2, c3;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000263
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000264 REQUIRE_INBUF(3)
265 c2 = IN2 ^ 0x80;
266 c3 = IN3 ^ 0x80;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000267
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000268 /* JIS X 0213 Plane 2 or JIS X 0212 (see NOTES) */
269 EMULATE_JISX0213_2000_DECODE_PLANE2(**outbuf, c2, c3)
270 else TRYMAP_DEC(jisx0213_2_bmp, **outbuf, c2, c3) ;
271 else TRYMAP_DEC(jisx0213_2_emp, code, c2, c3) {
272 WRITEUCS4(EMPBASE | code)
273 NEXT_IN(3)
274 continue;
275 }
276 else TRYMAP_DEC(jisx0212, **outbuf, c2, c3) ;
Victor Stinner2cded9c2011-07-08 01:45:13 +0200277 else return 1;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000278 NEXT(3, 1)
279 }
280 else {
281 unsigned char c2;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000282
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000283 REQUIRE_INBUF(2)
284 c ^= 0x80;
285 c2 = IN2 ^ 0x80;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000286
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000287 /* JIS X 0213 Plane 1 */
288 EMULATE_JISX0213_2000_DECODE_PLANE1(**outbuf, c, c2)
289 else if (c == 0x21 && c2 == 0x40) **outbuf = 0xff3c;
290 else if (c == 0x22 && c2 == 0x32) **outbuf = 0xff5e;
291 else TRYMAP_DEC(jisx0208, **outbuf, c, c2);
292 else TRYMAP_DEC(jisx0213_1_bmp, **outbuf, c, c2);
293 else TRYMAP_DEC(jisx0213_1_emp, code, c, c2) {
294 WRITEUCS4(EMPBASE | code)
295 NEXT_IN(2)
296 continue;
297 }
298 else TRYMAP_DEC(jisx0213_pair, code, c, c2) {
299 WRITE2(code >> 16, code & 0xffff)
300 NEXT(2, 2)
301 continue;
302 }
Victor Stinner2cded9c2011-07-08 01:45:13 +0200303 else return 1;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000304 NEXT(2, 1)
305 }
306 }
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000307
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000308 return 0;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000309}
310
311
312/*
313 * EUC-JP codec
314 */
315
316ENCODER(euc_jp)
317{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000318 while (inleft > 0) {
319 Py_UNICODE c = IN1;
320 DBCHAR code;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000321
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000322 if (c < 0x80) {
323 WRITE1((unsigned char)c)
324 NEXT(1, 1)
325 continue;
326 }
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000327
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000328 UCS4INVALID(c)
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000329
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000330 TRYMAP_ENC(jisxcommon, code, c);
331 else if (c >= 0xff61 && c <= 0xff9f) {
332 /* JIS X 0201 half-width katakana */
333 WRITE2(0x8e, c - 0xfec0)
334 NEXT(1, 2)
335 continue;
336 }
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000337#ifndef STRICT_BUILD
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000338 else if (c == 0xff3c) /* FULL-WIDTH REVERSE SOLIDUS */
339 code = 0x2140;
340 else if (c == 0xa5) { /* YEN SIGN */
341 WRITE1(0x5c);
342 NEXT(1, 1)
343 continue;
344 } else if (c == 0x203e) { /* OVERLINE */
345 WRITE1(0x7e);
346 NEXT(1, 1)
347 continue;
348 }
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000349#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000350 else
351 return 1;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000352
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000353 if (code & 0x8000) {
354 /* JIS X 0212 */
355 WRITE3(0x8f, code >> 8, (code & 0xFF) | 0x80)
356 NEXT(1, 3)
357 } else {
358 /* JIS X 0208 */
359 WRITE2((code >> 8) | 0x80, (code & 0xFF) | 0x80)
360 NEXT(1, 2)
361 }
362 }
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000363
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000364 return 0;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000365}
366
367DECODER(euc_jp)
368{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000369 while (inleft > 0) {
370 unsigned char c = IN1;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000371
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000372 REQUIRE_OUTBUF(1)
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000373
Victor Stinner5dfe3bb2011-06-03 23:34:09 +0200374 if (c < 0x80) {
375 OUT1(c)
376 NEXT(1, 1)
377 continue;
378 }
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000379
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000380 if (c == 0x8e) {
381 /* JIS X 0201 half-width katakana */
382 unsigned char c2;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000383
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000384 REQUIRE_INBUF(2)
385 c2 = IN2;
386 if (c2 >= 0xa1 && c2 <= 0xdf) {
387 OUT1(0xfec0 + c2)
388 NEXT(2, 1)
389 }
390 else
Victor Stinner2cded9c2011-07-08 01:45:13 +0200391 return 1;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000392 }
393 else if (c == 0x8f) {
394 unsigned char c2, c3;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000395
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000396 REQUIRE_INBUF(3)
397 c2 = IN2;
398 c3 = IN3;
399 /* JIS X 0212 */
400 TRYMAP_DEC(jisx0212, **outbuf, c2 ^ 0x80, c3 ^ 0x80) {
401 NEXT(3, 1)
402 }
403 else
Victor Stinner2cded9c2011-07-08 01:45:13 +0200404 return 1;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000405 }
406 else {
407 unsigned char c2;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000408
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000409 REQUIRE_INBUF(2)
410 c2 = IN2;
411 /* JIS X 0208 */
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000412#ifndef STRICT_BUILD
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000413 if (c == 0xa1 && c2 == 0xc0)
414 /* FULL-WIDTH REVERSE SOLIDUS */
415 **outbuf = 0xff3c;
416 else
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000417#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000418 TRYMAP_DEC(jisx0208, **outbuf,
419 c ^ 0x80, c2 ^ 0x80) ;
Victor Stinner2cded9c2011-07-08 01:45:13 +0200420 else return 1;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000421 NEXT(2, 1)
422 }
423 }
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000424
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000425 return 0;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000426}
427
428
429/*
430 * SHIFT_JIS codec
431 */
432
433ENCODER(shift_jis)
434{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000435 while (inleft > 0) {
436 Py_UNICODE c = IN1;
437 DBCHAR code;
438 unsigned char c1, c2;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000439
440#ifdef STRICT_BUILD
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000441 JISX0201_R_ENCODE(c, code)
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000442#else
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000443 if (c < 0x80) code = c;
444 else if (c == 0x00a5) code = 0x5c; /* YEN SIGN */
445 else if (c == 0x203e) code = 0x7e; /* OVERLINE */
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000446#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000447 else JISX0201_K_ENCODE(c, code)
448 else UCS4INVALID(c)
449 else code = NOCHAR;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000450
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000451 if (code < 0x80 || (code >= 0xa1 && code <= 0xdf)) {
452 REQUIRE_OUTBUF(1)
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000453
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000454 OUT1((unsigned char)code)
455 NEXT(1, 1)
456 continue;
457 }
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000458
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000459 REQUIRE_OUTBUF(2)
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000460
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000461 if (code == NOCHAR) {
462 TRYMAP_ENC(jisxcommon, code, c);
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000463#ifndef STRICT_BUILD
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000464 else if (c == 0xff3c)
465 code = 0x2140; /* FULL-WIDTH REVERSE SOLIDUS */
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000466#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000467 else
468 return 1;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000469
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000470 if (code & 0x8000) /* MSB set: JIS X 0212 */
471 return 1;
472 }
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000473
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000474 c1 = code >> 8;
475 c2 = code & 0xff;
476 c2 = (((c1 - 0x21) & 1) ? 0x5e : 0) + (c2 - 0x21);
477 c1 = (c1 - 0x21) >> 1;
478 OUT1(c1 < 0x1f ? c1 + 0x81 : c1 + 0xc1)
479 OUT2(c2 < 0x3f ? c2 + 0x40 : c2 + 0x41)
480 NEXT(1, 2)
481 }
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000482
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000483 return 0;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000484}
485
486DECODER(shift_jis)
487{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000488 while (inleft > 0) {
489 unsigned char c = IN1;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000490
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000491 REQUIRE_OUTBUF(1)
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000492
493#ifdef STRICT_BUILD
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000494 JISX0201_R_DECODE(c, **outbuf)
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000495#else
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000496 if (c < 0x80) **outbuf = c;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000497#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000498 else JISX0201_K_DECODE(c, **outbuf)
499 else if ((c >= 0x81 && c <= 0x9f) || (c >= 0xe0 && c <= 0xea)){
500 unsigned char c1, c2;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000501
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000502 REQUIRE_INBUF(2)
503 c2 = IN2;
504 if (c2 < 0x40 || (c2 > 0x7e && c2 < 0x80) || c2 > 0xfc)
Victor Stinner2cded9c2011-07-08 01:45:13 +0200505 return 1;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000506
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000507 c1 = (c < 0xe0 ? c - 0x81 : c - 0xc1);
508 c2 = (c2 < 0x80 ? c2 - 0x40 : c2 - 0x41);
509 c1 = (2 * c1 + (c2 < 0x5e ? 0 : 1) + 0x21);
510 c2 = (c2 < 0x5e ? c2 : c2 - 0x5e) + 0x21;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000511
512#ifndef STRICT_BUILD
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000513 if (c1 == 0x21 && c2 == 0x40) {
514 /* FULL-WIDTH REVERSE SOLIDUS */
515 OUT1(0xff3c)
516 NEXT(2, 1)
517 continue;
518 }
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000519#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000520 TRYMAP_DEC(jisx0208, **outbuf, c1, c2) {
521 NEXT(2, 1)
522 continue;
523 }
524 else
Victor Stinner2cded9c2011-07-08 01:45:13 +0200525 return 1;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000526 }
527 else
Victor Stinner2cded9c2011-07-08 01:45:13 +0200528 return 1;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000529
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000530 NEXT(1, 1) /* JIS X 0201 */
531 }
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000532
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000533 return 0;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000534}
535
536
537/*
538 * SHIFT_JIS-2004 codec
539 */
540
541ENCODER(shift_jis_2004)
542{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000543 while (inleft > 0) {
544 ucs4_t c = IN1;
545 DBCHAR code = NOCHAR;
546 int c1, c2;
547 Py_ssize_t insize;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000548
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000549 JISX0201_ENCODE(c, code)
550 else DECODE_SURROGATE(c)
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000551
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000552 if (code < 0x80 || (code >= 0xa1 && code <= 0xdf)) {
553 WRITE1((unsigned char)code)
554 NEXT(1, 1)
555 continue;
556 }
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000557
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000558 REQUIRE_OUTBUF(2)
559 insize = GET_INSIZE(c);
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000560
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000561 if (code == NOCHAR) {
562 if (c <= 0xffff) {
563 EMULATE_JISX0213_2000_ENCODE_BMP(code, c)
564 else TRYMAP_ENC(jisx0213_bmp, code, c) {
565 if (code == MULTIC) {
566 if (inleft < 2) {
567 if (flags & MBENC_FLUSH) {
568 code = find_pairencmap
569 ((ucs2_t)c, 0,
570 jisx0213_pair_encmap,
571 JISX0213_ENCPAIRS);
572 if (code == DBCINV)
573 return 1;
574 }
575 else
576 return MBERR_TOOFEW;
577 }
578 else {
579 code = find_pairencmap(
580 (ucs2_t)c, IN2,
581 jisx0213_pair_encmap,
582 JISX0213_ENCPAIRS);
583 if (code == DBCINV) {
584 code = find_pairencmap(
585 (ucs2_t)c, 0,
586 jisx0213_pair_encmap,
587 JISX0213_ENCPAIRS);
588 if (code == DBCINV)
589 return 1;
590 }
591 else
592 insize = 2;
593 }
594 }
595 }
596 else TRYMAP_ENC(jisxcommon, code, c) {
597 /* abandon JIS X 0212 codes */
598 if (code & 0x8000)
599 return 1;
600 }
601 else return 1;
602 }
603 else if (c >> 16 == EMPBASE >> 16) {
604 EMULATE_JISX0213_2000_ENCODE_EMP(code, c)
605 else TRYMAP_ENC(jisx0213_emp, code, c&0xffff);
606 else return insize;
607 }
608 else
609 return insize;
610 }
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000611
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000612 c1 = code >> 8;
613 c2 = (code & 0xff) - 0x21;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000614
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000615 if (c1 & 0x80) { /* Plane 2 */
616 if (c1 >= 0xee) c1 -= 0x87;
617 else if (c1 >= 0xac || c1 == 0xa8) c1 -= 0x49;
618 else c1 -= 0x43;
619 }
620 else /* Plane 1 */
621 c1 -= 0x21;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000622
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000623 if (c1 & 1) c2 += 0x5e;
624 c1 >>= 1;
625 OUT1(c1 + (c1 < 0x1f ? 0x81 : 0xc1))
626 OUT2(c2 + (c2 < 0x3f ? 0x40 : 0x41))
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000627
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000628 NEXT(insize, 2)
629 }
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000630
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000631 return 0;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000632}
633
634DECODER(shift_jis_2004)
635{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000636 while (inleft > 0) {
637 unsigned char c = IN1;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000638
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000639 REQUIRE_OUTBUF(1)
640 JISX0201_DECODE(c, **outbuf)
641 else if ((c >= 0x81 && c <= 0x9f) || (c >= 0xe0 && c <= 0xfc)){
642 unsigned char c1, c2;
643 ucs4_t code;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000644
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000645 REQUIRE_INBUF(2)
646 c2 = IN2;
647 if (c2 < 0x40 || (c2 > 0x7e && c2 < 0x80) || c2 > 0xfc)
Victor Stinner2cded9c2011-07-08 01:45:13 +0200648 return 1;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000649
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000650 c1 = (c < 0xe0 ? c - 0x81 : c - 0xc1);
651 c2 = (c2 < 0x80 ? c2 - 0x40 : c2 - 0x41);
652 c1 = (2 * c1 + (c2 < 0x5e ? 0 : 1));
653 c2 = (c2 < 0x5e ? c2 : c2 - 0x5e) + 0x21;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000654
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000655 if (c1 < 0x5e) { /* Plane 1 */
656 c1 += 0x21;
657 EMULATE_JISX0213_2000_DECODE_PLANE1(**outbuf,
658 c1, c2)
659 else TRYMAP_DEC(jisx0208, **outbuf, c1, c2) {
660 NEXT_OUT(1)
661 }
662 else TRYMAP_DEC(jisx0213_1_bmp, **outbuf,
663 c1, c2) {
664 NEXT_OUT(1)
665 }
666 else TRYMAP_DEC(jisx0213_1_emp, code, c1, c2) {
667 WRITEUCS4(EMPBASE | code)
668 }
669 else TRYMAP_DEC(jisx0213_pair, code, c1, c2) {
670 WRITE2(code >> 16, code & 0xffff)
671 NEXT_OUT(2)
672 }
673 else
Victor Stinner2cded9c2011-07-08 01:45:13 +0200674 return 1;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000675 NEXT_IN(2)
676 }
677 else { /* Plane 2 */
678 if (c1 >= 0x67) c1 += 0x07;
679 else if (c1 >= 0x63 || c1 == 0x5f) c1 -= 0x37;
680 else c1 -= 0x3d;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000681
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000682 EMULATE_JISX0213_2000_DECODE_PLANE2(**outbuf,
683 c1, c2)
684 else TRYMAP_DEC(jisx0213_2_bmp, **outbuf,
685 c1, c2) ;
686 else TRYMAP_DEC(jisx0213_2_emp, code, c1, c2) {
687 WRITEUCS4(EMPBASE | code)
688 NEXT_IN(2)
689 continue;
690 }
691 else
Victor Stinner2cded9c2011-07-08 01:45:13 +0200692 return 1;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000693 NEXT(2, 1)
694 }
695 continue;
696 }
697 else
Victor Stinner2cded9c2011-07-08 01:45:13 +0200698 return 1;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000699
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000700 NEXT(1, 1) /* JIS X 0201 */
701 }
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000702
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000703 return 0;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000704}
705
706
707BEGIN_MAPPINGS_LIST
708 MAPPING_DECONLY(jisx0208)
709 MAPPING_DECONLY(jisx0212)
710 MAPPING_ENCONLY(jisxcommon)
711 MAPPING_DECONLY(jisx0213_1_bmp)
712 MAPPING_DECONLY(jisx0213_2_bmp)
713 MAPPING_ENCONLY(jisx0213_bmp)
714 MAPPING_DECONLY(jisx0213_1_emp)
715 MAPPING_DECONLY(jisx0213_2_emp)
716 MAPPING_ENCONLY(jisx0213_emp)
717 MAPPING_ENCDEC(jisx0213_pair)
718 MAPPING_ENCDEC(cp932ext)
719END_MAPPINGS_LIST
720
721BEGIN_CODECS_LIST
722 CODEC_STATELESS(shift_jis)
723 CODEC_STATELESS(cp932)
724 CODEC_STATELESS(euc_jp)
725 CODEC_STATELESS(shift_jis_2004)
726 CODEC_STATELESS(euc_jis_2004)
727 { "euc_jisx0213", (void *)2000, NULL, _STATELESS_METHODS(euc_jis_2004) },
728 { "shift_jisx0213", (void *)2000, NULL, _STATELESS_METHODS(shift_jis_2004) },
729END_CODECS_LIST
730
731I_AM_A_MODULE_FOR(jp)