blob: 7ab318bcc11310d3a6bab48d268fd8f0b0f4cbcc [file] [log] [blame]
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001/*
2 * _codecs_jp.c: Codecs collection for Japanese encodings
3 *
4 * Written by Hye-Shik Chang <perky@FreeBSD.org>
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00005 */
6
7#define USING_BINARY_PAIR_SEARCH
8#define EMPBASE 0x20000
9
10#include "cjkcodecs.h"
11#include "mappings_jp.h"
12#include "mappings_jisx0213_pair.h"
13#include "alg_jisx0201.h"
14#include "emu_jisx0213_2000.h"
15
16/*
17 * CP932 codec
18 */
19
20ENCODER(cp932)
21{
Victor Stinnerd9491262013-04-14 02:06:32 +020022 while (*inpos < inlen) {
23 Py_UCS4 c = INCHAR1;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000024 DBCHAR code;
25 unsigned char c1, c2;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +000026
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000027 if (c <= 0x80) {
Victor Stinnerd9491262013-04-14 02:06:32 +020028 WRITEBYTE1((unsigned char)c)
29 NEXT(1, 1);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000030 continue;
31 }
32 else if (c >= 0xff61 && c <= 0xff9f) {
Victor Stinnerd9491262013-04-14 02:06:32 +020033 WRITEBYTE1(c - 0xfec0)
34 NEXT(1, 1);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000035 continue;
36 }
37 else if (c >= 0xf8f0 && c <= 0xf8f3) {
38 /* Windows compatibility */
39 REQUIRE_OUTBUF(1)
40 if (c == 0xf8f0)
Victor Stinnerd9491262013-04-14 02:06:32 +020041 OUTBYTE1(0xa0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000042 else
Victor Stinnerd9491262013-04-14 02:06:32 +020043 OUTBYTE1(c - 0xfef1 + 0xfd)
44 NEXT(1, 1);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000045 continue;
46 }
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +000047
Victor Stinnerd9491262013-04-14 02:06:32 +020048 if (c > 0xFFFF)
49 return 1;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000050 REQUIRE_OUTBUF(2)
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +000051
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000052 TRYMAP_ENC(cp932ext, code, c) {
Victor Stinnerd9491262013-04-14 02:06:32 +020053 OUTBYTE1(code >> 8)
54 OUTBYTE2(code & 0xff)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000055 }
56 else TRYMAP_ENC(jisxcommon, code, c) {
57 if (code & 0x8000) /* MSB set: JIS X 0212 */
58 return 1;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +000059
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000060 /* JIS X 0208 */
61 c1 = code >> 8;
62 c2 = code & 0xff;
63 c2 = (((c1 - 0x21) & 1) ? 0x5e : 0) + (c2 - 0x21);
64 c1 = (c1 - 0x21) >> 1;
Victor Stinnerd9491262013-04-14 02:06:32 +020065 OUTBYTE1(c1 < 0x1f ? c1 + 0x81 : c1 + 0xc1)
66 OUTBYTE2(c2 < 0x3f ? c2 + 0x40 : c2 + 0x41)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000067 }
68 else if (c >= 0xe000 && c < 0xe758) {
69 /* User-defined area */
Victor Stinnera0dd0212013-04-11 22:09:04 +020070 c1 = (Py_UCS4)(c - 0xe000) / 188;
71 c2 = (Py_UCS4)(c - 0xe000) % 188;
Victor Stinnerd9491262013-04-14 02:06:32 +020072 OUTBYTE1(c1 + 0xf0)
73 OUTBYTE2(c2 < 0x3f ? c2 + 0x40 : c2 + 0x41)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000074 }
75 else
76 return 1;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +000077
Victor Stinnerd9491262013-04-14 02:06:32 +020078 NEXT(1, 2);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000079 }
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +000080
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000081 return 0;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +000082}
83
84DECODER(cp932)
85{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000086 while (inleft > 0) {
Victor Stinnerd9491262013-04-14 02:06:32 +020087 unsigned char c = INBYTE1, c2;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +000088
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000089 if (c <= 0x80) {
Victor Stinnera0dd0212013-04-11 22:09:04 +020090 OUTCHAR(c);
91 NEXT_IN(1);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000092 continue;
93 }
94 else if (c >= 0xa0 && c <= 0xdf) {
95 if (c == 0xa0)
Victor Stinnera0dd0212013-04-11 22:09:04 +020096 OUTCHAR(0xf8f0); /* half-width katakana */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000097 else
Victor Stinnera0dd0212013-04-11 22:09:04 +020098 OUTCHAR(0xfec0 + c);
99 NEXT_IN(1);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000100 continue;
101 }
102 else if (c >= 0xfd/* && c <= 0xff*/) {
103 /* Windows compatibility */
Victor Stinnera0dd0212013-04-11 22:09:04 +0200104 OUTCHAR(0xf8f1 - 0xfd + c);
105 NEXT_IN(1);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000106 continue;
107 }
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000108
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000109 REQUIRE_INBUF(2)
Victor Stinnerd9491262013-04-14 02:06:32 +0200110 c2 = INBYTE2;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000111
Victor Stinnera0dd0212013-04-11 22:09:04 +0200112 TRYMAP_DEC(cp932ext, writer, c, c2);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000113 else if ((c >= 0x81 && c <= 0x9f) || (c >= 0xe0 && c <= 0xea)){
114 if (c2 < 0x40 || (c2 > 0x7e && c2 < 0x80) || c2 > 0xfc)
Victor Stinner2cded9c2011-07-08 01:45:13 +0200115 return 1;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000116
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000117 c = (c < 0xe0 ? c - 0x81 : c - 0xc1);
118 c2 = (c2 < 0x80 ? c2 - 0x40 : c2 - 0x41);
119 c = (2 * c + (c2 < 0x5e ? 0 : 1) + 0x21);
120 c2 = (c2 < 0x5e ? c2 : c2 - 0x5e) + 0x21;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000121
Victor Stinnera0dd0212013-04-11 22:09:04 +0200122 TRYMAP_DEC(jisx0208, writer, c, c2);
Victor Stinner2cded9c2011-07-08 01:45:13 +0200123 else return 1;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000124 }
125 else if (c >= 0xf0 && c <= 0xf9) {
126 if ((c2 >= 0x40 && c2 <= 0x7e) ||
127 (c2 >= 0x80 && c2 <= 0xfc))
Victor Stinnera0dd0212013-04-11 22:09:04 +0200128 OUTCHAR(0xe000 + 188 * (c - 0xf0) +
129 (c2 < 0x80 ? c2 - 0x40 : c2 - 0x41));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000130 else
Victor Stinner2cded9c2011-07-08 01:45:13 +0200131 return 1;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000132 }
133 else
Victor Stinner2cded9c2011-07-08 01:45:13 +0200134 return 1;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000135
Victor Stinnera0dd0212013-04-11 22:09:04 +0200136 NEXT_IN(2);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000137 }
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000138
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000139 return 0;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000140}
141
142
143/*
144 * EUC-JIS-2004 codec
145 */
146
147ENCODER(euc_jis_2004)
148{
Victor Stinnerd9491262013-04-14 02:06:32 +0200149 while (*inpos < inlen) {
150 Py_UCS4 c = INCHAR1;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000151 DBCHAR code;
152 Py_ssize_t insize;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000153
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000154 if (c < 0x80) {
Victor Stinnerd9491262013-04-14 02:06:32 +0200155 WRITEBYTE1(c)
156 NEXT(1, 1);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000157 continue;
158 }
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000159
Victor Stinnerd9491262013-04-14 02:06:32 +0200160 insize = 1;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000161
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000162 if (c <= 0xFFFF) {
163 EMULATE_JISX0213_2000_ENCODE_BMP(code, c)
164 else TRYMAP_ENC(jisx0213_bmp, code, c) {
165 if (code == MULTIC) {
Victor Stinnerd9491262013-04-14 02:06:32 +0200166 if (inlen - *inpos < 2) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000167 if (flags & MBENC_FLUSH) {
168 code = find_pairencmap(
169 (ucs2_t)c, 0,
170 jisx0213_pair_encmap,
171 JISX0213_ENCPAIRS);
172 if (code == DBCINV)
173 return 1;
174 }
175 else
176 return MBERR_TOOFEW;
177 }
178 else {
Victor Stinnerd9491262013-04-14 02:06:32 +0200179 Py_UCS4 c2 = INCHAR2;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000180 code = find_pairencmap(
Victor Stinnerd9491262013-04-14 02:06:32 +0200181 (ucs2_t)c, c2,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000182 jisx0213_pair_encmap,
183 JISX0213_ENCPAIRS);
184 if (code == DBCINV) {
185 code = find_pairencmap(
186 (ucs2_t)c, 0,
187 jisx0213_pair_encmap,
188 JISX0213_ENCPAIRS);
189 if (code == DBCINV)
190 return 1;
191 } else
192 insize = 2;
193 }
194 }
195 }
196 else TRYMAP_ENC(jisxcommon, code, c);
197 else if (c >= 0xff61 && c <= 0xff9f) {
198 /* JIS X 0201 half-width katakana */
Victor Stinnerd9491262013-04-14 02:06:32 +0200199 WRITEBYTE2(0x8e, c - 0xfec0)
200 NEXT(1, 2);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000201 continue;
202 }
203 else if (c == 0xff3c)
204 /* F/W REVERSE SOLIDUS (see NOTES) */
205 code = 0x2140;
206 else if (c == 0xff5e)
207 /* F/W TILDE (see NOTES) */
208 code = 0x2232;
209 else
210 return 1;
211 }
212 else if (c >> 16 == EMPBASE >> 16) {
213 EMULATE_JISX0213_2000_ENCODE_EMP(code, c)
214 else TRYMAP_ENC(jisx0213_emp, code, c & 0xffff);
215 else return insize;
216 }
217 else
218 return insize;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000219
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000220 if (code & 0x8000) {
221 /* Codeset 2 */
Victor Stinnerd9491262013-04-14 02:06:32 +0200222 WRITEBYTE3(0x8f, code >> 8, (code & 0xFF) | 0x80)
223 NEXT(insize, 3);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000224 } else {
225 /* Codeset 1 */
Victor Stinnerd9491262013-04-14 02:06:32 +0200226 WRITEBYTE2((code >> 8) | 0x80, (code & 0xFF) | 0x80)
227 NEXT(insize, 2);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000228 }
229 }
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000230
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000231 return 0;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000232}
233
234DECODER(euc_jis_2004)
235{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000236 while (inleft > 0) {
Victor Stinnerd9491262013-04-14 02:06:32 +0200237 unsigned char c = INBYTE1;
Victor Stinnera0dd0212013-04-11 22:09:04 +0200238 Py_UCS4 code;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000239
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000240 if (c < 0x80) {
Victor Stinnera0dd0212013-04-11 22:09:04 +0200241 OUTCHAR(c);
242 NEXT_IN(1);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000243 continue;
244 }
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000245
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000246 if (c == 0x8e) {
247 /* JIS X 0201 half-width katakana */
248 unsigned char c2;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000249
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000250 REQUIRE_INBUF(2)
Victor Stinnerd9491262013-04-14 02:06:32 +0200251 c2 = INBYTE2;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000252 if (c2 >= 0xa1 && c2 <= 0xdf) {
Victor Stinnera0dd0212013-04-11 22:09:04 +0200253 OUTCHAR(0xfec0 + c2);
254 NEXT_IN(2);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000255 }
256 else
Victor Stinner2cded9c2011-07-08 01:45:13 +0200257 return 1;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000258 }
259 else if (c == 0x8f) {
260 unsigned char c2, c3;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000261
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000262 REQUIRE_INBUF(3)
Victor Stinnerd9491262013-04-14 02:06:32 +0200263 c2 = INBYTE2 ^ 0x80;
264 c3 = INBYTE3 ^ 0x80;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000265
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000266 /* JIS X 0213 Plane 2 or JIS X 0212 (see NOTES) */
Victor Stinnera0dd0212013-04-11 22:09:04 +0200267 EMULATE_JISX0213_2000_DECODE_PLANE2(writer, c2, c3)
268 else TRYMAP_DEC(jisx0213_2_bmp, writer, c2, c3) ;
269 else TRYMAP_DEC_CHAR(jisx0213_2_emp, code, c2, c3) {
270 OUTCHAR(EMPBASE | code);
271 NEXT_IN(3);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000272 continue;
273 }
Victor Stinnera0dd0212013-04-11 22:09:04 +0200274 else TRYMAP_DEC(jisx0212, writer, c2, c3) ;
Victor Stinner2cded9c2011-07-08 01:45:13 +0200275 else return 1;
Victor Stinnera0dd0212013-04-11 22:09:04 +0200276 NEXT_IN(3);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000277 }
278 else {
279 unsigned char c2;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000280
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000281 REQUIRE_INBUF(2)
282 c ^= 0x80;
Victor Stinnerd9491262013-04-14 02:06:32 +0200283 c2 = INBYTE2 ^ 0x80;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000284
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000285 /* JIS X 0213 Plane 1 */
Victor Stinnera0dd0212013-04-11 22:09:04 +0200286 EMULATE_JISX0213_2000_DECODE_PLANE1(writer, c, c2)
287 else if (c == 0x21 && c2 == 0x40) OUTCHAR(0xff3c);
288 else if (c == 0x22 && c2 == 0x32) OUTCHAR(0xff5e);
289 else TRYMAP_DEC(jisx0208, writer, c, c2);
290 else TRYMAP_DEC(jisx0213_1_bmp, writer, c, c2);
291 else TRYMAP_DEC_CHAR(jisx0213_1_emp, code, c, c2) {
292 OUTCHAR(EMPBASE | code);
293 NEXT_IN(2);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000294 continue;
295 }
Victor Stinnera0dd0212013-04-11 22:09:04 +0200296 else TRYMAP_DEC_CHAR(jisx0213_pair, code, c, c2) {
297 OUTCHAR2(code >> 16, code & 0xffff);
298 NEXT_IN(2);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000299 continue;
300 }
Victor Stinner2cded9c2011-07-08 01:45:13 +0200301 else return 1;
Victor Stinnera0dd0212013-04-11 22:09:04 +0200302 NEXT_IN(2);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000303 }
304 }
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000305
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000306 return 0;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000307}
308
309
310/*
311 * EUC-JP codec
312 */
313
314ENCODER(euc_jp)
315{
Victor Stinnerd9491262013-04-14 02:06:32 +0200316 while (*inpos < inlen) {
317 Py_UCS4 c = INCHAR1;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000318 DBCHAR code;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000319
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000320 if (c < 0x80) {
Victor Stinnerd9491262013-04-14 02:06:32 +0200321 WRITEBYTE1((unsigned char)c)
322 NEXT(1, 1);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000323 continue;
324 }
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000325
Victor Stinnerd9491262013-04-14 02:06:32 +0200326 if (c > 0xFFFF)
327 return 1;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000328
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000329 TRYMAP_ENC(jisxcommon, code, c);
330 else if (c >= 0xff61 && c <= 0xff9f) {
331 /* JIS X 0201 half-width katakana */
Victor Stinnerd9491262013-04-14 02:06:32 +0200332 WRITEBYTE2(0x8e, c - 0xfec0)
333 NEXT(1, 2);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000334 continue;
335 }
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000336#ifndef STRICT_BUILD
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000337 else if (c == 0xff3c) /* FULL-WIDTH REVERSE SOLIDUS */
338 code = 0x2140;
339 else if (c == 0xa5) { /* YEN SIGN */
Victor Stinnerd9491262013-04-14 02:06:32 +0200340 WRITEBYTE1(0x5c);
341 NEXT(1, 1);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000342 continue;
343 } else if (c == 0x203e) { /* OVERLINE */
Victor Stinnerd9491262013-04-14 02:06:32 +0200344 WRITEBYTE1(0x7e);
345 NEXT(1, 1);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000346 continue;
347 }
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000348#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000349 else
350 return 1;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000351
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000352 if (code & 0x8000) {
353 /* JIS X 0212 */
Victor Stinnerd9491262013-04-14 02:06:32 +0200354 WRITEBYTE3(0x8f, code >> 8, (code & 0xFF) | 0x80)
355 NEXT(1, 3);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000356 } else {
357 /* JIS X 0208 */
Victor Stinnerd9491262013-04-14 02:06:32 +0200358 WRITEBYTE2((code >> 8) | 0x80, (code & 0xFF) | 0x80)
359 NEXT(1, 2);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000360 }
361 }
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000362
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000363 return 0;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000364}
365
366DECODER(euc_jp)
367{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000368 while (inleft > 0) {
Victor Stinnerd9491262013-04-14 02:06:32 +0200369 unsigned char c = INBYTE1;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000370
Victor Stinner5dfe3bb2011-06-03 23:34:09 +0200371 if (c < 0x80) {
Victor Stinnera0dd0212013-04-11 22:09:04 +0200372 OUTCHAR(c);
373 NEXT_IN(1);
Victor Stinner5dfe3bb2011-06-03 23:34:09 +0200374 continue;
375 }
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000376
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000377 if (c == 0x8e) {
378 /* JIS X 0201 half-width katakana */
379 unsigned char c2;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000380
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000381 REQUIRE_INBUF(2)
Victor Stinnerd9491262013-04-14 02:06:32 +0200382 c2 = INBYTE2;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000383 if (c2 >= 0xa1 && c2 <= 0xdf) {
Victor Stinnera0dd0212013-04-11 22:09:04 +0200384 OUTCHAR(0xfec0 + c2);
385 NEXT_IN(2);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000386 }
387 else
Victor Stinner2cded9c2011-07-08 01:45:13 +0200388 return 1;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000389 }
390 else if (c == 0x8f) {
391 unsigned char c2, c3;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000392
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000393 REQUIRE_INBUF(3)
Victor Stinnerd9491262013-04-14 02:06:32 +0200394 c2 = INBYTE2;
395 c3 = INBYTE3;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000396 /* JIS X 0212 */
Victor Stinnera0dd0212013-04-11 22:09:04 +0200397 TRYMAP_DEC(jisx0212, writer, c2 ^ 0x80, c3 ^ 0x80) {
398 NEXT_IN(3);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000399 }
400 else
Victor Stinner2cded9c2011-07-08 01:45:13 +0200401 return 1;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000402 }
403 else {
404 unsigned char c2;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000405
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000406 REQUIRE_INBUF(2)
Victor Stinnerd9491262013-04-14 02:06:32 +0200407 c2 = INBYTE2;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000408 /* JIS X 0208 */
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000409#ifndef STRICT_BUILD
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000410 if (c == 0xa1 && c2 == 0xc0)
411 /* FULL-WIDTH REVERSE SOLIDUS */
Victor Stinnera0dd0212013-04-11 22:09:04 +0200412 OUTCHAR(0xff3c);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000413 else
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000414#endif
Victor Stinnera0dd0212013-04-11 22:09:04 +0200415 TRYMAP_DEC(jisx0208, writer,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000416 c ^ 0x80, c2 ^ 0x80) ;
Victor Stinner2cded9c2011-07-08 01:45:13 +0200417 else return 1;
Victor Stinnera0dd0212013-04-11 22:09:04 +0200418 NEXT_IN(2);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000419 }
420 }
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000421
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000422 return 0;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000423}
424
425
426/*
427 * SHIFT_JIS codec
428 */
429
430ENCODER(shift_jis)
431{
Victor Stinnerd9491262013-04-14 02:06:32 +0200432 while (*inpos < inlen) {
433 Py_UCS4 c = INCHAR1;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000434 DBCHAR code;
435 unsigned char c1, c2;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000436
437#ifdef STRICT_BUILD
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000438 JISX0201_R_ENCODE(c, code)
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000439#else
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000440 if (c < 0x80) code = c;
441 else if (c == 0x00a5) code = 0x5c; /* YEN SIGN */
442 else if (c == 0x203e) code = 0x7e; /* OVERLINE */
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000443#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000444 else JISX0201_K_ENCODE(c, code)
Victor Stinnerd9491262013-04-14 02:06:32 +0200445 else if (c > 0xFFFF)
446 return 1;
447 else
448 code = NOCHAR;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000449
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000450 if (code < 0x80 || (code >= 0xa1 && code <= 0xdf)) {
451 REQUIRE_OUTBUF(1)
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000452
Victor Stinnerd9491262013-04-14 02:06:32 +0200453 OUTBYTE1((unsigned char)code)
454 NEXT(1, 1);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000455 continue;
456 }
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000457
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000458 REQUIRE_OUTBUF(2)
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000459
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000460 if (code == NOCHAR) {
461 TRYMAP_ENC(jisxcommon, code, c);
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000462#ifndef STRICT_BUILD
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000463 else if (c == 0xff3c)
464 code = 0x2140; /* FULL-WIDTH REVERSE SOLIDUS */
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000465#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000466 else
467 return 1;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000468
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000469 if (code & 0x8000) /* MSB set: JIS X 0212 */
470 return 1;
471 }
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000472
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000473 c1 = code >> 8;
474 c2 = code & 0xff;
475 c2 = (((c1 - 0x21) & 1) ? 0x5e : 0) + (c2 - 0x21);
476 c1 = (c1 - 0x21) >> 1;
Victor Stinnerd9491262013-04-14 02:06:32 +0200477 OUTBYTE1(c1 < 0x1f ? c1 + 0x81 : c1 + 0xc1)
478 OUTBYTE2(c2 < 0x3f ? c2 + 0x40 : c2 + 0x41)
479 NEXT(1, 2);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000480 }
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000481
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000482 return 0;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000483}
484
485DECODER(shift_jis)
486{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000487 while (inleft > 0) {
Victor Stinnerd9491262013-04-14 02:06:32 +0200488 unsigned char c = INBYTE1;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000489
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000490#ifdef STRICT_BUILD
Victor Stinnera0dd0212013-04-11 22:09:04 +0200491 JISX0201_R_DECODE(c, writer)
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000492#else
Victor Stinnera0dd0212013-04-11 22:09:04 +0200493 if (c < 0x80) OUTCHAR(c);
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000494#endif
Victor Stinnera0dd0212013-04-11 22:09:04 +0200495 else JISX0201_K_DECODE(c, writer)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000496 else if ((c >= 0x81 && c <= 0x9f) || (c >= 0xe0 && c <= 0xea)){
497 unsigned char c1, c2;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000498
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000499 REQUIRE_INBUF(2)
Victor Stinnerd9491262013-04-14 02:06:32 +0200500 c2 = INBYTE2;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000501 if (c2 < 0x40 || (c2 > 0x7e && c2 < 0x80) || c2 > 0xfc)
Victor Stinner2cded9c2011-07-08 01:45:13 +0200502 return 1;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000503
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000504 c1 = (c < 0xe0 ? c - 0x81 : c - 0xc1);
505 c2 = (c2 < 0x80 ? c2 - 0x40 : c2 - 0x41);
506 c1 = (2 * c1 + (c2 < 0x5e ? 0 : 1) + 0x21);
507 c2 = (c2 < 0x5e ? c2 : c2 - 0x5e) + 0x21;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000508
509#ifndef STRICT_BUILD
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000510 if (c1 == 0x21 && c2 == 0x40) {
511 /* FULL-WIDTH REVERSE SOLIDUS */
Victor Stinnera0dd0212013-04-11 22:09:04 +0200512 OUTCHAR(0xff3c);
513 NEXT_IN(2);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000514 continue;
515 }
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000516#endif
Victor Stinnera0dd0212013-04-11 22:09:04 +0200517 TRYMAP_DEC(jisx0208, writer, c1, c2) {
518 NEXT_IN(2);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000519 continue;
520 }
521 else
Victor Stinner2cded9c2011-07-08 01:45:13 +0200522 return 1;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000523 }
524 else
Victor Stinner2cded9c2011-07-08 01:45:13 +0200525 return 1;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000526
Victor Stinnera0dd0212013-04-11 22:09:04 +0200527 NEXT_IN(1); /* JIS X 0201 */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000528 }
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000529
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000530 return 0;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000531}
532
533
534/*
535 * SHIFT_JIS-2004 codec
536 */
537
538ENCODER(shift_jis_2004)
539{
Victor Stinnerd9491262013-04-14 02:06:32 +0200540 while (*inpos < inlen) {
541 Py_UCS4 c = INCHAR1;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000542 DBCHAR code = NOCHAR;
543 int c1, c2;
544 Py_ssize_t insize;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000545
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000546 JISX0201_ENCODE(c, code)
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000547
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000548 if (code < 0x80 || (code >= 0xa1 && code <= 0xdf)) {
Victor Stinnerd9491262013-04-14 02:06:32 +0200549 WRITEBYTE1((unsigned char)code)
550 NEXT(1, 1);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000551 continue;
552 }
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000553
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000554 REQUIRE_OUTBUF(2)
Victor Stinnerd9491262013-04-14 02:06:32 +0200555 insize = 1;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000556
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000557 if (code == NOCHAR) {
558 if (c <= 0xffff) {
559 EMULATE_JISX0213_2000_ENCODE_BMP(code, c)
560 else TRYMAP_ENC(jisx0213_bmp, code, c) {
561 if (code == MULTIC) {
Victor Stinnerd9491262013-04-14 02:06:32 +0200562 if (inlen - *inpos < 2) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000563 if (flags & MBENC_FLUSH) {
564 code = find_pairencmap
565 ((ucs2_t)c, 0,
566 jisx0213_pair_encmap,
567 JISX0213_ENCPAIRS);
568 if (code == DBCINV)
569 return 1;
570 }
571 else
572 return MBERR_TOOFEW;
573 }
574 else {
Victor Stinnerd9491262013-04-14 02:06:32 +0200575 Py_UCS4 ch2 = INCHAR2;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000576 code = find_pairencmap(
Victor Stinnerd9491262013-04-14 02:06:32 +0200577 (ucs2_t)c, ch2,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000578 jisx0213_pair_encmap,
579 JISX0213_ENCPAIRS);
580 if (code == DBCINV) {
581 code = find_pairencmap(
582 (ucs2_t)c, 0,
583 jisx0213_pair_encmap,
584 JISX0213_ENCPAIRS);
585 if (code == DBCINV)
586 return 1;
587 }
588 else
589 insize = 2;
590 }
591 }
592 }
593 else TRYMAP_ENC(jisxcommon, code, c) {
594 /* abandon JIS X 0212 codes */
595 if (code & 0x8000)
596 return 1;
597 }
598 else return 1;
599 }
600 else if (c >> 16 == EMPBASE >> 16) {
601 EMULATE_JISX0213_2000_ENCODE_EMP(code, c)
602 else TRYMAP_ENC(jisx0213_emp, code, c&0xffff);
603 else return insize;
604 }
605 else
606 return insize;
607 }
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000608
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000609 c1 = code >> 8;
610 c2 = (code & 0xff) - 0x21;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000611
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000612 if (c1 & 0x80) { /* Plane 2 */
613 if (c1 >= 0xee) c1 -= 0x87;
614 else if (c1 >= 0xac || c1 == 0xa8) c1 -= 0x49;
615 else c1 -= 0x43;
616 }
617 else /* Plane 1 */
618 c1 -= 0x21;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000619
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000620 if (c1 & 1) c2 += 0x5e;
621 c1 >>= 1;
Victor Stinnerd9491262013-04-14 02:06:32 +0200622 OUTBYTE1(c1 + (c1 < 0x1f ? 0x81 : 0xc1))
623 OUTBYTE2(c2 + (c2 < 0x3f ? 0x40 : 0x41))
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000624
Victor Stinnerd9491262013-04-14 02:06:32 +0200625 NEXT(insize, 2);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000626 }
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000627
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000628 return 0;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000629}
630
631DECODER(shift_jis_2004)
632{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000633 while (inleft > 0) {
Victor Stinnerd9491262013-04-14 02:06:32 +0200634 unsigned char c = INBYTE1;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000635
Victor Stinnera0dd0212013-04-11 22:09:04 +0200636 JISX0201_DECODE(c, writer)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000637 else if ((c >= 0x81 && c <= 0x9f) || (c >= 0xe0 && c <= 0xfc)){
638 unsigned char c1, c2;
Victor Stinnera0dd0212013-04-11 22:09:04 +0200639 Py_UCS4 code;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000640
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000641 REQUIRE_INBUF(2)
Victor Stinnerd9491262013-04-14 02:06:32 +0200642 c2 = INBYTE2;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000643 if (c2 < 0x40 || (c2 > 0x7e && c2 < 0x80) || c2 > 0xfc)
Victor Stinner2cded9c2011-07-08 01:45:13 +0200644 return 1;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000645
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000646 c1 = (c < 0xe0 ? c - 0x81 : c - 0xc1);
647 c2 = (c2 < 0x80 ? c2 - 0x40 : c2 - 0x41);
648 c1 = (2 * c1 + (c2 < 0x5e ? 0 : 1));
649 c2 = (c2 < 0x5e ? c2 : c2 - 0x5e) + 0x21;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000650
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000651 if (c1 < 0x5e) { /* Plane 1 */
652 c1 += 0x21;
Victor Stinnera0dd0212013-04-11 22:09:04 +0200653 EMULATE_JISX0213_2000_DECODE_PLANE1(writer,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000654 c1, c2)
Victor Stinnera0dd0212013-04-11 22:09:04 +0200655 else TRYMAP_DEC(jisx0208, writer, c1, c2) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000656 }
Victor Stinnera0dd0212013-04-11 22:09:04 +0200657 else TRYMAP_DEC(jisx0213_1_bmp, writer,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000658 c1, c2) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000659 }
Victor Stinnera0dd0212013-04-11 22:09:04 +0200660 else TRYMAP_DEC_CHAR(jisx0213_1_emp, code, c1, c2) {
661 OUTCHAR(EMPBASE | code);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000662 }
Victor Stinnera0dd0212013-04-11 22:09:04 +0200663 else TRYMAP_DEC_CHAR(jisx0213_pair, code, c1, c2) {
664 OUTCHAR2(code >> 16, code & 0xffff);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000665 }
666 else
Victor Stinner2cded9c2011-07-08 01:45:13 +0200667 return 1;
Victor Stinnera0dd0212013-04-11 22:09:04 +0200668 NEXT_IN(2);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000669 }
670 else { /* Plane 2 */
671 if (c1 >= 0x67) c1 += 0x07;
672 else if (c1 >= 0x63 || c1 == 0x5f) c1 -= 0x37;
673 else c1 -= 0x3d;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000674
Victor Stinnera0dd0212013-04-11 22:09:04 +0200675 EMULATE_JISX0213_2000_DECODE_PLANE2(writer,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000676 c1, c2)
Victor Stinnera0dd0212013-04-11 22:09:04 +0200677 else TRYMAP_DEC(jisx0213_2_bmp, writer,
678 c1, c2) {
679 } else TRYMAP_DEC_CHAR(jisx0213_2_emp, code, c1, c2) {
680 OUTCHAR(EMPBASE | code);
681 NEXT_IN(2);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000682 continue;
683 }
684 else
Victor Stinner2cded9c2011-07-08 01:45:13 +0200685 return 1;
Victor Stinnera0dd0212013-04-11 22:09:04 +0200686 NEXT_IN(2);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000687 }
688 continue;
689 }
690 else
Victor Stinner2cded9c2011-07-08 01:45:13 +0200691 return 1;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000692
Victor Stinnera0dd0212013-04-11 22:09:04 +0200693 NEXT_IN(1); /* JIS X 0201 */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000694 }
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000695
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000696 return 0;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000697}
698
699
700BEGIN_MAPPINGS_LIST
701 MAPPING_DECONLY(jisx0208)
702 MAPPING_DECONLY(jisx0212)
703 MAPPING_ENCONLY(jisxcommon)
704 MAPPING_DECONLY(jisx0213_1_bmp)
705 MAPPING_DECONLY(jisx0213_2_bmp)
706 MAPPING_ENCONLY(jisx0213_bmp)
707 MAPPING_DECONLY(jisx0213_1_emp)
708 MAPPING_DECONLY(jisx0213_2_emp)
709 MAPPING_ENCONLY(jisx0213_emp)
710 MAPPING_ENCDEC(jisx0213_pair)
711 MAPPING_ENCDEC(cp932ext)
712END_MAPPINGS_LIST
713
714BEGIN_CODECS_LIST
715 CODEC_STATELESS(shift_jis)
716 CODEC_STATELESS(cp932)
717 CODEC_STATELESS(euc_jp)
718 CODEC_STATELESS(shift_jis_2004)
719 CODEC_STATELESS(euc_jis_2004)
720 { "euc_jisx0213", (void *)2000, NULL, _STATELESS_METHODS(euc_jis_2004) },
721 { "shift_jisx0213", (void *)2000, NULL, _STATELESS_METHODS(shift_jis_2004) },
722END_CODECS_LIST
723
724I_AM_A_MODULE_FOR(jp)