blob: 8bfb8132619b831ddbf89bd589bc66d5de3c05bd [file] [log] [blame]
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001/*
2 * _codecs_jp.c: Codecs collection for Japanese encodings
3 *
4 * Written by Hye-Shik Chang <perky@FreeBSD.org>
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00005 */
6
7#define USING_BINARY_PAIR_SEARCH
8#define EMPBASE 0x20000
9
10#include "cjkcodecs.h"
11#include "mappings_jp.h"
12#include "mappings_jisx0213_pair.h"
13#include "alg_jisx0201.h"
14#include "emu_jisx0213_2000.h"
15
16/*
17 * CP932 codec
18 */
19
20ENCODER(cp932)
21{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000022 while (inleft > 0) {
Victor Stinnera0dd0212013-04-11 22:09:04 +020023 Py_UCS4 c = IN1;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000024 DBCHAR code;
25 unsigned char c1, c2;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +000026
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000027 if (c <= 0x80) {
28 WRITE1((unsigned char)c)
29 NEXT(1, 1)
30 continue;
31 }
32 else if (c >= 0xff61 && c <= 0xff9f) {
33 WRITE1(c - 0xfec0)
34 NEXT(1, 1)
35 continue;
36 }
37 else if (c >= 0xf8f0 && c <= 0xf8f3) {
38 /* Windows compatibility */
39 REQUIRE_OUTBUF(1)
40 if (c == 0xf8f0)
41 OUT1(0xa0)
42 else
43 OUT1(c - 0xfef1 + 0xfd)
44 NEXT(1, 1)
45 continue;
46 }
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +000047
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000048 UCS4INVALID(c)
49 REQUIRE_OUTBUF(2)
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +000050
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000051 TRYMAP_ENC(cp932ext, code, c) {
52 OUT1(code >> 8)
53 OUT2(code & 0xff)
54 }
55 else TRYMAP_ENC(jisxcommon, code, c) {
56 if (code & 0x8000) /* MSB set: JIS X 0212 */
57 return 1;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +000058
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000059 /* JIS X 0208 */
60 c1 = code >> 8;
61 c2 = code & 0xff;
62 c2 = (((c1 - 0x21) & 1) ? 0x5e : 0) + (c2 - 0x21);
63 c1 = (c1 - 0x21) >> 1;
64 OUT1(c1 < 0x1f ? c1 + 0x81 : c1 + 0xc1)
65 OUT2(c2 < 0x3f ? c2 + 0x40 : c2 + 0x41)
66 }
67 else if (c >= 0xe000 && c < 0xe758) {
68 /* User-defined area */
Victor Stinnera0dd0212013-04-11 22:09:04 +020069 c1 = (Py_UCS4)(c - 0xe000) / 188;
70 c2 = (Py_UCS4)(c - 0xe000) % 188;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000071 OUT1(c1 + 0xf0)
72 OUT2(c2 < 0x3f ? c2 + 0x40 : c2 + 0x41)
73 }
74 else
75 return 1;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +000076
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000077 NEXT(1, 2)
78 }
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +000079
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000080 return 0;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +000081}
82
83DECODER(cp932)
84{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000085 while (inleft > 0) {
86 unsigned char c = IN1, c2;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +000087
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000088 if (c <= 0x80) {
Victor Stinnera0dd0212013-04-11 22:09:04 +020089 OUTCHAR(c);
90 NEXT_IN(1);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000091 continue;
92 }
93 else if (c >= 0xa0 && c <= 0xdf) {
94 if (c == 0xa0)
Victor Stinnera0dd0212013-04-11 22:09:04 +020095 OUTCHAR(0xf8f0); /* half-width katakana */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000096 else
Victor Stinnera0dd0212013-04-11 22:09:04 +020097 OUTCHAR(0xfec0 + c);
98 NEXT_IN(1);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000099 continue;
100 }
101 else if (c >= 0xfd/* && c <= 0xff*/) {
102 /* Windows compatibility */
Victor Stinnera0dd0212013-04-11 22:09:04 +0200103 OUTCHAR(0xf8f1 - 0xfd + c);
104 NEXT_IN(1);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000105 continue;
106 }
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000107
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000108 REQUIRE_INBUF(2)
109 c2 = IN2;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000110
Victor Stinnera0dd0212013-04-11 22:09:04 +0200111 TRYMAP_DEC(cp932ext, writer, c, c2);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000112 else if ((c >= 0x81 && c <= 0x9f) || (c >= 0xe0 && c <= 0xea)){
113 if (c2 < 0x40 || (c2 > 0x7e && c2 < 0x80) || c2 > 0xfc)
Victor Stinner2cded9c2011-07-08 01:45:13 +0200114 return 1;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000115
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000116 c = (c < 0xe0 ? c - 0x81 : c - 0xc1);
117 c2 = (c2 < 0x80 ? c2 - 0x40 : c2 - 0x41);
118 c = (2 * c + (c2 < 0x5e ? 0 : 1) + 0x21);
119 c2 = (c2 < 0x5e ? c2 : c2 - 0x5e) + 0x21;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000120
Victor Stinnera0dd0212013-04-11 22:09:04 +0200121 TRYMAP_DEC(jisx0208, writer, c, c2);
Victor Stinner2cded9c2011-07-08 01:45:13 +0200122 else return 1;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000123 }
124 else if (c >= 0xf0 && c <= 0xf9) {
125 if ((c2 >= 0x40 && c2 <= 0x7e) ||
126 (c2 >= 0x80 && c2 <= 0xfc))
Victor Stinnera0dd0212013-04-11 22:09:04 +0200127 OUTCHAR(0xe000 + 188 * (c - 0xf0) +
128 (c2 < 0x80 ? c2 - 0x40 : c2 - 0x41));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000129 else
Victor Stinner2cded9c2011-07-08 01:45:13 +0200130 return 1;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000131 }
132 else
Victor Stinner2cded9c2011-07-08 01:45:13 +0200133 return 1;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000134
Victor Stinnera0dd0212013-04-11 22:09:04 +0200135 NEXT_IN(2);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000136 }
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000137
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000138 return 0;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000139}
140
141
142/*
143 * EUC-JIS-2004 codec
144 */
145
146ENCODER(euc_jis_2004)
147{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000148 while (inleft > 0) {
Victor Stinnera0dd0212013-04-11 22:09:04 +0200149 Py_UCS4 c = IN1;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000150 DBCHAR code;
151 Py_ssize_t insize;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000152
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000153 if (c < 0x80) {
154 WRITE1(c)
155 NEXT(1, 1)
156 continue;
157 }
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000158
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000159 DECODE_SURROGATE(c)
160 insize = GET_INSIZE(c);
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000161
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000162 if (c <= 0xFFFF) {
163 EMULATE_JISX0213_2000_ENCODE_BMP(code, c)
164 else TRYMAP_ENC(jisx0213_bmp, code, c) {
165 if (code == MULTIC) {
166 if (inleft < 2) {
167 if (flags & MBENC_FLUSH) {
168 code = find_pairencmap(
169 (ucs2_t)c, 0,
170 jisx0213_pair_encmap,
171 JISX0213_ENCPAIRS);
172 if (code == DBCINV)
173 return 1;
174 }
175 else
176 return MBERR_TOOFEW;
177 }
178 else {
179 code = find_pairencmap(
180 (ucs2_t)c, (*inbuf)[1],
181 jisx0213_pair_encmap,
182 JISX0213_ENCPAIRS);
183 if (code == DBCINV) {
184 code = find_pairencmap(
185 (ucs2_t)c, 0,
186 jisx0213_pair_encmap,
187 JISX0213_ENCPAIRS);
188 if (code == DBCINV)
189 return 1;
190 } else
191 insize = 2;
192 }
193 }
194 }
195 else TRYMAP_ENC(jisxcommon, code, c);
196 else if (c >= 0xff61 && c <= 0xff9f) {
197 /* JIS X 0201 half-width katakana */
198 WRITE2(0x8e, c - 0xfec0)
199 NEXT(1, 2)
200 continue;
201 }
202 else if (c == 0xff3c)
203 /* F/W REVERSE SOLIDUS (see NOTES) */
204 code = 0x2140;
205 else if (c == 0xff5e)
206 /* F/W TILDE (see NOTES) */
207 code = 0x2232;
208 else
209 return 1;
210 }
211 else if (c >> 16 == EMPBASE >> 16) {
212 EMULATE_JISX0213_2000_ENCODE_EMP(code, c)
213 else TRYMAP_ENC(jisx0213_emp, code, c & 0xffff);
214 else return insize;
215 }
216 else
217 return insize;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000218
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000219 if (code & 0x8000) {
220 /* Codeset 2 */
221 WRITE3(0x8f, code >> 8, (code & 0xFF) | 0x80)
222 NEXT(insize, 3)
223 } else {
224 /* Codeset 1 */
225 WRITE2((code >> 8) | 0x80, (code & 0xFF) | 0x80)
226 NEXT(insize, 2)
227 }
228 }
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000229
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000230 return 0;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000231}
232
233DECODER(euc_jis_2004)
234{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000235 while (inleft > 0) {
236 unsigned char c = IN1;
Victor Stinnera0dd0212013-04-11 22:09:04 +0200237 Py_UCS4 code;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000238
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000239 if (c < 0x80) {
Victor Stinnera0dd0212013-04-11 22:09:04 +0200240 OUTCHAR(c);
241 NEXT_IN(1);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000242 continue;
243 }
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000244
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000245 if (c == 0x8e) {
246 /* JIS X 0201 half-width katakana */
247 unsigned char c2;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000248
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000249 REQUIRE_INBUF(2)
250 c2 = IN2;
251 if (c2 >= 0xa1 && c2 <= 0xdf) {
Victor Stinnera0dd0212013-04-11 22:09:04 +0200252 OUTCHAR(0xfec0 + c2);
253 NEXT_IN(2);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000254 }
255 else
Victor Stinner2cded9c2011-07-08 01:45:13 +0200256 return 1;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000257 }
258 else if (c == 0x8f) {
259 unsigned char c2, c3;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000260
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000261 REQUIRE_INBUF(3)
262 c2 = IN2 ^ 0x80;
263 c3 = IN3 ^ 0x80;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000264
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000265 /* JIS X 0213 Plane 2 or JIS X 0212 (see NOTES) */
Victor Stinnera0dd0212013-04-11 22:09:04 +0200266 EMULATE_JISX0213_2000_DECODE_PLANE2(writer, c2, c3)
267 else TRYMAP_DEC(jisx0213_2_bmp, writer, c2, c3) ;
268 else TRYMAP_DEC_CHAR(jisx0213_2_emp, code, c2, c3) {
269 OUTCHAR(EMPBASE | code);
270 NEXT_IN(3);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000271 continue;
272 }
Victor Stinnera0dd0212013-04-11 22:09:04 +0200273 else TRYMAP_DEC(jisx0212, writer, c2, c3) ;
Victor Stinner2cded9c2011-07-08 01:45:13 +0200274 else return 1;
Victor Stinnera0dd0212013-04-11 22:09:04 +0200275 NEXT_IN(3);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000276 }
277 else {
278 unsigned char c2;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000279
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000280 REQUIRE_INBUF(2)
281 c ^= 0x80;
282 c2 = IN2 ^ 0x80;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000283
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000284 /* JIS X 0213 Plane 1 */
Victor Stinnera0dd0212013-04-11 22:09:04 +0200285 EMULATE_JISX0213_2000_DECODE_PLANE1(writer, c, c2)
286 else if (c == 0x21 && c2 == 0x40) OUTCHAR(0xff3c);
287 else if (c == 0x22 && c2 == 0x32) OUTCHAR(0xff5e);
288 else TRYMAP_DEC(jisx0208, writer, c, c2);
289 else TRYMAP_DEC(jisx0213_1_bmp, writer, c, c2);
290 else TRYMAP_DEC_CHAR(jisx0213_1_emp, code, c, c2) {
291 OUTCHAR(EMPBASE | code);
292 NEXT_IN(2);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000293 continue;
294 }
Victor Stinnera0dd0212013-04-11 22:09:04 +0200295 else TRYMAP_DEC_CHAR(jisx0213_pair, code, c, c2) {
296 OUTCHAR2(code >> 16, code & 0xffff);
297 NEXT_IN(2);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000298 continue;
299 }
Victor Stinner2cded9c2011-07-08 01:45:13 +0200300 else return 1;
Victor Stinnera0dd0212013-04-11 22:09:04 +0200301 NEXT_IN(2);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000302 }
303 }
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000304
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000305 return 0;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000306}
307
308
309/*
310 * EUC-JP codec
311 */
312
313ENCODER(euc_jp)
314{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000315 while (inleft > 0) {
Victor Stinnera0dd0212013-04-11 22:09:04 +0200316 Py_UCS4 c = IN1;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000317 DBCHAR code;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000318
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000319 if (c < 0x80) {
320 WRITE1((unsigned char)c)
321 NEXT(1, 1)
322 continue;
323 }
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000324
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000325 UCS4INVALID(c)
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000326
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000327 TRYMAP_ENC(jisxcommon, code, c);
328 else if (c >= 0xff61 && c <= 0xff9f) {
329 /* JIS X 0201 half-width katakana */
330 WRITE2(0x8e, c - 0xfec0)
331 NEXT(1, 2)
332 continue;
333 }
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000334#ifndef STRICT_BUILD
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000335 else if (c == 0xff3c) /* FULL-WIDTH REVERSE SOLIDUS */
336 code = 0x2140;
337 else if (c == 0xa5) { /* YEN SIGN */
338 WRITE1(0x5c);
339 NEXT(1, 1)
340 continue;
341 } else if (c == 0x203e) { /* OVERLINE */
342 WRITE1(0x7e);
343 NEXT(1, 1)
344 continue;
345 }
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000346#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000347 else
348 return 1;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000349
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000350 if (code & 0x8000) {
351 /* JIS X 0212 */
352 WRITE3(0x8f, code >> 8, (code & 0xFF) | 0x80)
353 NEXT(1, 3)
354 } else {
355 /* JIS X 0208 */
356 WRITE2((code >> 8) | 0x80, (code & 0xFF) | 0x80)
357 NEXT(1, 2)
358 }
359 }
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000360
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000361 return 0;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000362}
363
364DECODER(euc_jp)
365{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000366 while (inleft > 0) {
367 unsigned char c = IN1;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000368
Victor Stinner5dfe3bb2011-06-03 23:34:09 +0200369 if (c < 0x80) {
Victor Stinnera0dd0212013-04-11 22:09:04 +0200370 OUTCHAR(c);
371 NEXT_IN(1);
Victor Stinner5dfe3bb2011-06-03 23:34:09 +0200372 continue;
373 }
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000374
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000375 if (c == 0x8e) {
376 /* JIS X 0201 half-width katakana */
377 unsigned char c2;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000378
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000379 REQUIRE_INBUF(2)
380 c2 = IN2;
381 if (c2 >= 0xa1 && c2 <= 0xdf) {
Victor Stinnera0dd0212013-04-11 22:09:04 +0200382 OUTCHAR(0xfec0 + c2);
383 NEXT_IN(2);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000384 }
385 else
Victor Stinner2cded9c2011-07-08 01:45:13 +0200386 return 1;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000387 }
388 else if (c == 0x8f) {
389 unsigned char c2, c3;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000390
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000391 REQUIRE_INBUF(3)
392 c2 = IN2;
393 c3 = IN3;
394 /* JIS X 0212 */
Victor Stinnera0dd0212013-04-11 22:09:04 +0200395 TRYMAP_DEC(jisx0212, writer, c2 ^ 0x80, c3 ^ 0x80) {
396 NEXT_IN(3);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000397 }
398 else
Victor Stinner2cded9c2011-07-08 01:45:13 +0200399 return 1;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000400 }
401 else {
402 unsigned char c2;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000403
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000404 REQUIRE_INBUF(2)
405 c2 = IN2;
406 /* JIS X 0208 */
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000407#ifndef STRICT_BUILD
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000408 if (c == 0xa1 && c2 == 0xc0)
409 /* FULL-WIDTH REVERSE SOLIDUS */
Victor Stinnera0dd0212013-04-11 22:09:04 +0200410 OUTCHAR(0xff3c);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000411 else
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000412#endif
Victor Stinnera0dd0212013-04-11 22:09:04 +0200413 TRYMAP_DEC(jisx0208, writer,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000414 c ^ 0x80, c2 ^ 0x80) ;
Victor Stinner2cded9c2011-07-08 01:45:13 +0200415 else return 1;
Victor Stinnera0dd0212013-04-11 22:09:04 +0200416 NEXT_IN(2);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000417 }
418 }
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000419
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000420 return 0;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000421}
422
423
424/*
425 * SHIFT_JIS codec
426 */
427
428ENCODER(shift_jis)
429{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000430 while (inleft > 0) {
Victor Stinnera0dd0212013-04-11 22:09:04 +0200431 Py_UCS4 c = IN1;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000432 DBCHAR code;
433 unsigned char c1, c2;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000434
435#ifdef STRICT_BUILD
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000436 JISX0201_R_ENCODE(c, code)
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000437#else
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000438 if (c < 0x80) code = c;
439 else if (c == 0x00a5) code = 0x5c; /* YEN SIGN */
440 else if (c == 0x203e) code = 0x7e; /* OVERLINE */
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000441#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000442 else JISX0201_K_ENCODE(c, code)
443 else UCS4INVALID(c)
444 else code = NOCHAR;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000445
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000446 if (code < 0x80 || (code >= 0xa1 && code <= 0xdf)) {
447 REQUIRE_OUTBUF(1)
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000448
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000449 OUT1((unsigned char)code)
450 NEXT(1, 1)
451 continue;
452 }
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000453
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000454 REQUIRE_OUTBUF(2)
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000455
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000456 if (code == NOCHAR) {
457 TRYMAP_ENC(jisxcommon, code, c);
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000458#ifndef STRICT_BUILD
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000459 else if (c == 0xff3c)
460 code = 0x2140; /* FULL-WIDTH REVERSE SOLIDUS */
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000461#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000462 else
463 return 1;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000464
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000465 if (code & 0x8000) /* MSB set: JIS X 0212 */
466 return 1;
467 }
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000468
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000469 c1 = code >> 8;
470 c2 = code & 0xff;
471 c2 = (((c1 - 0x21) & 1) ? 0x5e : 0) + (c2 - 0x21);
472 c1 = (c1 - 0x21) >> 1;
473 OUT1(c1 < 0x1f ? c1 + 0x81 : c1 + 0xc1)
474 OUT2(c2 < 0x3f ? c2 + 0x40 : c2 + 0x41)
475 NEXT(1, 2)
476 }
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000477
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000478 return 0;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000479}
480
481DECODER(shift_jis)
482{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000483 while (inleft > 0) {
484 unsigned char c = IN1;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000485
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000486#ifdef STRICT_BUILD
Victor Stinnera0dd0212013-04-11 22:09:04 +0200487 JISX0201_R_DECODE(c, writer)
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000488#else
Victor Stinnera0dd0212013-04-11 22:09:04 +0200489 if (c < 0x80) OUTCHAR(c);
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000490#endif
Victor Stinnera0dd0212013-04-11 22:09:04 +0200491 else JISX0201_K_DECODE(c, writer)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000492 else if ((c >= 0x81 && c <= 0x9f) || (c >= 0xe0 && c <= 0xea)){
493 unsigned char c1, c2;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000494
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000495 REQUIRE_INBUF(2)
496 c2 = IN2;
497 if (c2 < 0x40 || (c2 > 0x7e && c2 < 0x80) || c2 > 0xfc)
Victor Stinner2cded9c2011-07-08 01:45:13 +0200498 return 1;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000499
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000500 c1 = (c < 0xe0 ? c - 0x81 : c - 0xc1);
501 c2 = (c2 < 0x80 ? c2 - 0x40 : c2 - 0x41);
502 c1 = (2 * c1 + (c2 < 0x5e ? 0 : 1) + 0x21);
503 c2 = (c2 < 0x5e ? c2 : c2 - 0x5e) + 0x21;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000504
505#ifndef STRICT_BUILD
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000506 if (c1 == 0x21 && c2 == 0x40) {
507 /* FULL-WIDTH REVERSE SOLIDUS */
Victor Stinnera0dd0212013-04-11 22:09:04 +0200508 OUTCHAR(0xff3c);
509 NEXT_IN(2);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000510 continue;
511 }
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000512#endif
Victor Stinnera0dd0212013-04-11 22:09:04 +0200513 TRYMAP_DEC(jisx0208, writer, c1, c2) {
514 NEXT_IN(2);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000515 continue;
516 }
517 else
Victor Stinner2cded9c2011-07-08 01:45:13 +0200518 return 1;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000519 }
520 else
Victor Stinner2cded9c2011-07-08 01:45:13 +0200521 return 1;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000522
Victor Stinnera0dd0212013-04-11 22:09:04 +0200523 NEXT_IN(1); /* JIS X 0201 */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000524 }
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000525
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000526 return 0;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000527}
528
529
530/*
531 * SHIFT_JIS-2004 codec
532 */
533
534ENCODER(shift_jis_2004)
535{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000536 while (inleft > 0) {
Victor Stinnera0dd0212013-04-11 22:09:04 +0200537 Py_UCS4 c = IN1;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000538 DBCHAR code = NOCHAR;
539 int c1, c2;
540 Py_ssize_t insize;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000541
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000542 JISX0201_ENCODE(c, code)
543 else DECODE_SURROGATE(c)
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000544
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000545 if (code < 0x80 || (code >= 0xa1 && code <= 0xdf)) {
546 WRITE1((unsigned char)code)
547 NEXT(1, 1)
548 continue;
549 }
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000550
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000551 REQUIRE_OUTBUF(2)
552 insize = GET_INSIZE(c);
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000553
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000554 if (code == NOCHAR) {
555 if (c <= 0xffff) {
556 EMULATE_JISX0213_2000_ENCODE_BMP(code, c)
557 else TRYMAP_ENC(jisx0213_bmp, code, c) {
558 if (code == MULTIC) {
559 if (inleft < 2) {
560 if (flags & MBENC_FLUSH) {
561 code = find_pairencmap
562 ((ucs2_t)c, 0,
563 jisx0213_pair_encmap,
564 JISX0213_ENCPAIRS);
565 if (code == DBCINV)
566 return 1;
567 }
568 else
569 return MBERR_TOOFEW;
570 }
571 else {
572 code = find_pairencmap(
573 (ucs2_t)c, IN2,
574 jisx0213_pair_encmap,
575 JISX0213_ENCPAIRS);
576 if (code == DBCINV) {
577 code = find_pairencmap(
578 (ucs2_t)c, 0,
579 jisx0213_pair_encmap,
580 JISX0213_ENCPAIRS);
581 if (code == DBCINV)
582 return 1;
583 }
584 else
585 insize = 2;
586 }
587 }
588 }
589 else TRYMAP_ENC(jisxcommon, code, c) {
590 /* abandon JIS X 0212 codes */
591 if (code & 0x8000)
592 return 1;
593 }
594 else return 1;
595 }
596 else if (c >> 16 == EMPBASE >> 16) {
597 EMULATE_JISX0213_2000_ENCODE_EMP(code, c)
598 else TRYMAP_ENC(jisx0213_emp, code, c&0xffff);
599 else return insize;
600 }
601 else
602 return insize;
603 }
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000604
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000605 c1 = code >> 8;
606 c2 = (code & 0xff) - 0x21;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000607
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000608 if (c1 & 0x80) { /* Plane 2 */
609 if (c1 >= 0xee) c1 -= 0x87;
610 else if (c1 >= 0xac || c1 == 0xa8) c1 -= 0x49;
611 else c1 -= 0x43;
612 }
613 else /* Plane 1 */
614 c1 -= 0x21;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000615
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000616 if (c1 & 1) c2 += 0x5e;
617 c1 >>= 1;
618 OUT1(c1 + (c1 < 0x1f ? 0x81 : 0xc1))
619 OUT2(c2 + (c2 < 0x3f ? 0x40 : 0x41))
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000620
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000621 NEXT(insize, 2)
622 }
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000623
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000624 return 0;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000625}
626
627DECODER(shift_jis_2004)
628{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000629 while (inleft > 0) {
630 unsigned char c = IN1;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000631
Victor Stinnera0dd0212013-04-11 22:09:04 +0200632 JISX0201_DECODE(c, writer)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000633 else if ((c >= 0x81 && c <= 0x9f) || (c >= 0xe0 && c <= 0xfc)){
634 unsigned char c1, c2;
Victor Stinnera0dd0212013-04-11 22:09:04 +0200635 Py_UCS4 code;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000636
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000637 REQUIRE_INBUF(2)
638 c2 = IN2;
639 if (c2 < 0x40 || (c2 > 0x7e && c2 < 0x80) || c2 > 0xfc)
Victor Stinner2cded9c2011-07-08 01:45:13 +0200640 return 1;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000641
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000642 c1 = (c < 0xe0 ? c - 0x81 : c - 0xc1);
643 c2 = (c2 < 0x80 ? c2 - 0x40 : c2 - 0x41);
644 c1 = (2 * c1 + (c2 < 0x5e ? 0 : 1));
645 c2 = (c2 < 0x5e ? c2 : c2 - 0x5e) + 0x21;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000646
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000647 if (c1 < 0x5e) { /* Plane 1 */
648 c1 += 0x21;
Victor Stinnera0dd0212013-04-11 22:09:04 +0200649 EMULATE_JISX0213_2000_DECODE_PLANE1(writer,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000650 c1, c2)
Victor Stinnera0dd0212013-04-11 22:09:04 +0200651 else TRYMAP_DEC(jisx0208, writer, c1, c2) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000652 }
Victor Stinnera0dd0212013-04-11 22:09:04 +0200653 else TRYMAP_DEC(jisx0213_1_bmp, writer,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000654 c1, c2) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000655 }
Victor Stinnera0dd0212013-04-11 22:09:04 +0200656 else TRYMAP_DEC_CHAR(jisx0213_1_emp, code, c1, c2) {
657 OUTCHAR(EMPBASE | code);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000658 }
Victor Stinnera0dd0212013-04-11 22:09:04 +0200659 else TRYMAP_DEC_CHAR(jisx0213_pair, code, c1, c2) {
660 OUTCHAR2(code >> 16, code & 0xffff);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000661 }
662 else
Victor Stinner2cded9c2011-07-08 01:45:13 +0200663 return 1;
Victor Stinnera0dd0212013-04-11 22:09:04 +0200664 NEXT_IN(2);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000665 }
666 else { /* Plane 2 */
667 if (c1 >= 0x67) c1 += 0x07;
668 else if (c1 >= 0x63 || c1 == 0x5f) c1 -= 0x37;
669 else c1 -= 0x3d;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000670
Victor Stinnera0dd0212013-04-11 22:09:04 +0200671 EMULATE_JISX0213_2000_DECODE_PLANE2(writer,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000672 c1, c2)
Victor Stinnera0dd0212013-04-11 22:09:04 +0200673 else TRYMAP_DEC(jisx0213_2_bmp, writer,
674 c1, c2) {
675 } else TRYMAP_DEC_CHAR(jisx0213_2_emp, code, c1, c2) {
676 OUTCHAR(EMPBASE | code);
677 NEXT_IN(2);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000678 continue;
679 }
680 else
Victor Stinner2cded9c2011-07-08 01:45:13 +0200681 return 1;
Victor Stinnera0dd0212013-04-11 22:09:04 +0200682 NEXT_IN(2);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000683 }
684 continue;
685 }
686 else
Victor Stinner2cded9c2011-07-08 01:45:13 +0200687 return 1;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000688
Victor Stinnera0dd0212013-04-11 22:09:04 +0200689 NEXT_IN(1); /* JIS X 0201 */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000690 }
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000691
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000692 return 0;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000693}
694
695
696BEGIN_MAPPINGS_LIST
697 MAPPING_DECONLY(jisx0208)
698 MAPPING_DECONLY(jisx0212)
699 MAPPING_ENCONLY(jisxcommon)
700 MAPPING_DECONLY(jisx0213_1_bmp)
701 MAPPING_DECONLY(jisx0213_2_bmp)
702 MAPPING_ENCONLY(jisx0213_bmp)
703 MAPPING_DECONLY(jisx0213_1_emp)
704 MAPPING_DECONLY(jisx0213_2_emp)
705 MAPPING_ENCONLY(jisx0213_emp)
706 MAPPING_ENCDEC(jisx0213_pair)
707 MAPPING_ENCDEC(cp932ext)
708END_MAPPINGS_LIST
709
710BEGIN_CODECS_LIST
711 CODEC_STATELESS(shift_jis)
712 CODEC_STATELESS(cp932)
713 CODEC_STATELESS(euc_jp)
714 CODEC_STATELESS(shift_jis_2004)
715 CODEC_STATELESS(euc_jis_2004)
716 { "euc_jisx0213", (void *)2000, NULL, _STATELESS_METHODS(euc_jis_2004) },
717 { "shift_jisx0213", (void *)2000, NULL, _STATELESS_METHODS(shift_jis_2004) },
718END_CODECS_LIST
719
720I_AM_A_MODULE_FOR(jp)