blob: 1a070f2f393219de4ec69bf4a14ba081c6b436ee [file] [log] [blame]
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001/*
2 * _codecs_cn.c: Codecs collection for Mainland Chinese encodings
3 *
4 * Written by Hye-Shik Chang <perky@FreeBSD.org>
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00005 */
6
7#include "cjkcodecs.h"
8#include "mappings_cn.h"
9
Hye-Shik Changc5c57e62005-12-12 11:48:32 +000010/**
11 * hz is predefined as 100 on AIX. So we undefine it to avoid
12 * conflict against hz codec's.
13 */
14#ifdef _AIX
15#undef hz
16#endif
17
Serhiy Storchakad3faf432015-01-18 11:28:37 +020018/* GBK and GB2312 map differently in few code points that are listed below:
Thomas Wouters89f507f2006-12-13 04:49:30 +000019 *
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000020 * gb2312 gbk
21 * A1A4 U+30FB KATAKANA MIDDLE DOT U+00B7 MIDDLE DOT
22 * A1AA U+2015 HORIZONTAL BAR U+2014 EM DASH
23 * A844 undefined U+2015 HORIZONTAL BAR
Thomas Wouters89f507f2006-12-13 04:49:30 +000024 */
25
Victor Stinner14c9fea2013-10-29 00:19:27 +010026#define GBK_DECODE(dc1, dc2, writer) \
27 if ((dc1) == 0xa1 && (dc2) == 0xaa) { \
28 OUTCHAR(0x2014); \
29 } \
30 else if ((dc1) == 0xa8 && (dc2) == 0x44) { \
31 OUTCHAR(0x2015); \
32 } \
33 else if ((dc1) == 0xa1 && (dc2) == 0xa4) { \
34 OUTCHAR(0x00b7); \
35 } \
36 else if (TRYMAP_DEC(gb2312, decoded, dc1 ^ 0x80, dc2 ^ 0x80)) { \
37 OUTCHAR(decoded); \
38 } \
39 else if (TRYMAP_DEC(gbkext, decoded, dc1, dc2)) { \
40 OUTCHAR(decoded); \
41 }
Thomas Wouters89f507f2006-12-13 04:49:30 +000042
Victor Stinner28c63f72013-10-29 00:59:44 +010043#define GBK_ENCODE(code, assi) \
44 if ((code) == 0x2014) { \
45 (assi) = 0xa1aa; \
46 } else if ((code) == 0x2015) { \
47 (assi) = 0xa844; \
48 } else if ((code) == 0x00b7) { \
49 (assi) = 0xa1a4; \
Victor Stinner14c9fea2013-10-29 00:19:27 +010050 } else if ((code) != 0x30fb && TRYMAP_ENC(gbcommon, assi, code)) { \
Victor Stinner28c63f72013-10-29 00:59:44 +010051 ; \
Victor Stinner14c9fea2013-10-29 00:19:27 +010052 }
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +000053
54/*
55 * GB2312 codec
56 */
57
58ENCODER(gb2312)
59{
Victor Stinnerd9491262013-04-14 02:06:32 +020060 while (*inpos < inlen) {
61 Py_UCS4 c = INCHAR1;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000062 DBCHAR code;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +000063
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000064 if (c < 0x80) {
Victor Stinner14c9fea2013-10-29 00:19:27 +010065 WRITEBYTE1((unsigned char)c);
Victor Stinnerd9491262013-04-14 02:06:32 +020066 NEXT(1, 1);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000067 continue;
68 }
Victor Stinnerd9491262013-04-14 02:06:32 +020069
70 if (c > 0xFFFF)
71 return 1;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +000072
Victor Stinner14c9fea2013-10-29 00:19:27 +010073 REQUIRE_OUTBUF(2);
Victor Stinner146a2ed2013-10-29 00:09:41 +010074 if (TRYMAP_ENC(gbcommon, code, c))
75 ;
76 else
77 return 1;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +000078
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000079 if (code & 0x8000) /* MSB set: GBK */
80 return 1;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +000081
Victor Stinner14c9fea2013-10-29 00:19:27 +010082 OUTBYTE1((code >> 8) | 0x80);
83 OUTBYTE2((code & 0xFF) | 0x80);
Victor Stinnerd9491262013-04-14 02:06:32 +020084 NEXT(1, 2);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000085 }
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +000086
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000087 return 0;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +000088}
89
90DECODER(gb2312)
91{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000092 while (inleft > 0) {
93 unsigned char c = **inbuf;
Victor Stinner11bdf912013-10-28 23:18:39 +010094 Py_UCS4 decoded;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +000095
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000096 if (c < 0x80) {
Victor Stinnera0dd0212013-04-11 22:09:04 +020097 OUTCHAR(c);
98 NEXT_IN(1);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000099 continue;
100 }
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000101
Victor Stinner28c63f72013-10-29 00:59:44 +0100102 REQUIRE_INBUF(2);
Victor Stinner11bdf912013-10-28 23:18:39 +0100103 if (TRYMAP_DEC(gb2312, decoded, c ^ 0x80, INBYTE2 ^ 0x80)) {
104 OUTCHAR(decoded);
Victor Stinnera0dd0212013-04-11 22:09:04 +0200105 NEXT_IN(2);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000106 }
Victor Stinner146a2ed2013-10-29 00:09:41 +0100107 else
108 return 1;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000109 }
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000110
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000111 return 0;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000112}
113
114
115/*
116 * GBK codec
117 */
118
119ENCODER(gbk)
120{
Victor Stinnerd9491262013-04-14 02:06:32 +0200121 while (*inpos < inlen) {
122 Py_UCS4 c = INCHAR1;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000123 DBCHAR code;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000124
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000125 if (c < 0x80) {
Victor Stinner14c9fea2013-10-29 00:19:27 +0100126 WRITEBYTE1((unsigned char)c);
Victor Stinnerd9491262013-04-14 02:06:32 +0200127 NEXT(1, 1);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000128 continue;
129 }
Victor Stinnerd9491262013-04-14 02:06:32 +0200130
131 if (c > 0xFFFF)
132 return 1;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000133
Victor Stinner14c9fea2013-10-29 00:19:27 +0100134 REQUIRE_OUTBUF(2);
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000135
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000136 GBK_ENCODE(c, code)
Victor Stinner146a2ed2013-10-29 00:09:41 +0100137 else
138 return 1;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000139
Victor Stinner14c9fea2013-10-29 00:19:27 +0100140 OUTBYTE1((code >> 8) | 0x80);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000141 if (code & 0x8000)
Victor Stinner14c9fea2013-10-29 00:19:27 +0100142 OUTBYTE2((code & 0xFF)); /* MSB set: GBK */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000143 else
Victor Stinner14c9fea2013-10-29 00:19:27 +0100144 OUTBYTE2((code & 0xFF) | 0x80); /* MSB unset: GB2312 */
Victor Stinnerd9491262013-04-14 02:06:32 +0200145 NEXT(1, 2);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000146 }
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000147
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000148 return 0;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000149}
150
151DECODER(gbk)
152{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000153 while (inleft > 0) {
Victor Stinnerd9491262013-04-14 02:06:32 +0200154 unsigned char c = INBYTE1;
Victor Stinner11bdf912013-10-28 23:18:39 +0100155 Py_UCS4 decoded;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000156
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000157 if (c < 0x80) {
Victor Stinnera0dd0212013-04-11 22:09:04 +0200158 OUTCHAR(c);
159 NEXT_IN(1);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000160 continue;
161 }
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000162
Victor Stinner28c63f72013-10-29 00:59:44 +0100163 REQUIRE_INBUF(2);
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000164
Victor Stinnerd9491262013-04-14 02:06:32 +0200165 GBK_DECODE(c, INBYTE2, writer)
Victor Stinner146a2ed2013-10-29 00:09:41 +0100166 else
167 return 1;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000168
Victor Stinnera0dd0212013-04-11 22:09:04 +0200169 NEXT_IN(2);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000170 }
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000171
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000172 return 0;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000173}
174
175
176/*
177 * GB18030 codec
178 */
179
180ENCODER(gb18030)
181{
Victor Stinnerd9491262013-04-14 02:06:32 +0200182 while (*inpos < inlen) {
183 Py_UCS4 c = INCHAR1;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000184 DBCHAR code;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000185
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000186 if (c < 0x80) {
Victor Stinner14c9fea2013-10-29 00:19:27 +0100187 WRITEBYTE1(c);
Victor Stinnerd9491262013-04-14 02:06:32 +0200188 NEXT(1, 1);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000189 continue;
190 }
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000191
Victor Stinnerd9491262013-04-14 02:06:32 +0200192 if (c >= 0x10000) {
Victor Stinnera0dd0212013-04-11 22:09:04 +0200193 Py_UCS4 tc = c - 0x10000;
Victor Stinnerd9491262013-04-14 02:06:32 +0200194 assert (c <= 0x10FFFF);
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000195
Victor Stinner14c9fea2013-10-29 00:19:27 +0100196 REQUIRE_OUTBUF(4);
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000197
Victor Stinner14c9fea2013-10-29 00:19:27 +0100198 OUTBYTE4((unsigned char)(tc % 10) + 0x30);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000199 tc /= 10;
Victor Stinner14c9fea2013-10-29 00:19:27 +0100200 OUTBYTE3((unsigned char)(tc % 126) + 0x81);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000201 tc /= 126;
Victor Stinner14c9fea2013-10-29 00:19:27 +0100202 OUTBYTE2((unsigned char)(tc % 10) + 0x30);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000203 tc /= 10;
Victor Stinner14c9fea2013-10-29 00:19:27 +0100204 OUTBYTE1((unsigned char)(tc + 0x90));
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000205
Victor Stinnerd9491262013-04-14 02:06:32 +0200206 NEXT(1, 4);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000207 continue;
208 }
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000209
Victor Stinner14c9fea2013-10-29 00:19:27 +0100210 REQUIRE_OUTBUF(2);
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000211
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000212 GBK_ENCODE(c, code)
Victor Stinner146a2ed2013-10-29 00:09:41 +0100213 else if (TRYMAP_ENC(gb18030ext, code, c))
214 ;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000215 else {
216 const struct _gb18030_to_unibmp_ranges *utrrange;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000217
Victor Stinner14c9fea2013-10-29 00:19:27 +0100218 REQUIRE_OUTBUF(4);
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000219
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000220 for (utrrange = gb18030_to_unibmp_ranges;
221 utrrange->first != 0;
222 utrrange++)
223 if (utrrange->first <= c &&
224 c <= utrrange->last) {
Victor Stinnera0dd0212013-04-11 22:09:04 +0200225 Py_UCS4 tc;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000226
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000227 tc = c - utrrange->first +
228 utrrange->base;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000229
Victor Stinner14c9fea2013-10-29 00:19:27 +0100230 OUTBYTE4((unsigned char)(tc % 10) + 0x30);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000231 tc /= 10;
Victor Stinner14c9fea2013-10-29 00:19:27 +0100232 OUTBYTE3((unsigned char)(tc % 126) + 0x81);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000233 tc /= 126;
Victor Stinner14c9fea2013-10-29 00:19:27 +0100234 OUTBYTE2((unsigned char)(tc % 10) + 0x30);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000235 tc /= 10;
Victor Stinner14c9fea2013-10-29 00:19:27 +0100236 OUTBYTE1((unsigned char)tc + 0x81);
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000237
Victor Stinnerd9491262013-04-14 02:06:32 +0200238 NEXT(1, 4);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000239 break;
240 }
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000241
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000242 if (utrrange->first == 0)
243 return 1;
244 continue;
245 }
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000246
Victor Stinner14c9fea2013-10-29 00:19:27 +0100247 OUTBYTE1((code >> 8) | 0x80);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000248 if (code & 0x8000)
Victor Stinner14c9fea2013-10-29 00:19:27 +0100249 OUTBYTE2((code & 0xFF)); /* MSB set: GBK or GB18030ext */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000250 else
Victor Stinner14c9fea2013-10-29 00:19:27 +0100251 OUTBYTE2((code & 0xFF) | 0x80); /* MSB unset: GB2312 */
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000252
Victor Stinnerd9491262013-04-14 02:06:32 +0200253 NEXT(1, 2);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000254 }
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000255
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000256 return 0;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000257}
258
259DECODER(gb18030)
260{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000261 while (inleft > 0) {
Victor Stinnerd9491262013-04-14 02:06:32 +0200262 unsigned char c = INBYTE1, c2;
Victor Stinner11bdf912013-10-28 23:18:39 +0100263 Py_UCS4 decoded;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000264
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000265 if (c < 0x80) {
Victor Stinnera0dd0212013-04-11 22:09:04 +0200266 OUTCHAR(c);
267 NEXT_IN(1);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000268 continue;
269 }
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000270
Victor Stinner28c63f72013-10-29 00:59:44 +0100271 REQUIRE_INBUF(2);
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000272
Victor Stinnerd9491262013-04-14 02:06:32 +0200273 c2 = INBYTE2;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000274 if (c2 >= 0x30 && c2 <= 0x39) { /* 4 bytes seq */
275 const struct _gb18030_to_unibmp_ranges *utr;
276 unsigned char c3, c4;
Victor Stinnera0dd0212013-04-11 22:09:04 +0200277 Py_UCS4 lseq;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000278
Victor Stinner28c63f72013-10-29 00:59:44 +0100279 REQUIRE_INBUF(4);
Victor Stinnerd9491262013-04-14 02:06:32 +0200280 c3 = INBYTE3;
281 c4 = INBYTE4;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000282 if (c < 0x81 || c3 < 0x81 || c4 < 0x30 || c4 > 0x39)
Victor Stinner2cded9c2011-07-08 01:45:13 +0200283 return 1;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000284 c -= 0x81; c2 -= 0x30;
285 c3 -= 0x81; c4 -= 0x30;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000286
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000287 if (c < 4) { /* U+0080 - U+FFFF */
Victor Stinnera0dd0212013-04-11 22:09:04 +0200288 lseq = ((Py_UCS4)c * 10 + c2) * 1260 +
289 (Py_UCS4)c3 * 10 + c4;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000290 if (lseq < 39420) {
291 for (utr = gb18030_to_unibmp_ranges;
292 lseq >= (utr + 1)->base;
293 utr++) ;
Victor Stinnera0dd0212013-04-11 22:09:04 +0200294 OUTCHAR(utr->first - utr->base + lseq);
295 NEXT_IN(4);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000296 continue;
297 }
298 }
299 else if (c >= 15) { /* U+10000 - U+10FFFF */
Victor Stinnera0dd0212013-04-11 22:09:04 +0200300 lseq = 0x10000 + (((Py_UCS4)c-15) * 10 + c2)
301 * 1260 + (Py_UCS4)c3 * 10 + c4;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000302 if (lseq <= 0x10FFFF) {
Victor Stinnera0dd0212013-04-11 22:09:04 +0200303 OUTCHAR(lseq);
304 NEXT_IN(4);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000305 continue;
306 }
307 }
Victor Stinner2cded9c2011-07-08 01:45:13 +0200308 return 1;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000309 }
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000310
Victor Stinnera0dd0212013-04-11 22:09:04 +0200311 GBK_DECODE(c, c2, writer)
Victor Stinner11bdf912013-10-28 23:18:39 +0100312 else if (TRYMAP_DEC(gb18030ext, decoded, c, c2))
313 OUTCHAR(decoded);
Victor Stinner146a2ed2013-10-29 00:09:41 +0100314 else
315 return 1;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000316
Victor Stinnera0dd0212013-04-11 22:09:04 +0200317 NEXT_IN(2);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000318 }
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000319
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000320 return 0;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000321}
322
323
324/*
325 * HZ codec
326 */
327
328ENCODER_INIT(hz)
329{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000330 state->i = 0;
331 return 0;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000332}
333
334ENCODER_RESET(hz)
335{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000336 if (state->i != 0) {
Victor Stinner14c9fea2013-10-29 00:19:27 +0100337 WRITEBYTE2('~', '}');
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000338 state->i = 0;
Victor Stinnerd9491262013-04-14 02:06:32 +0200339 NEXT_OUT(2);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000340 }
341 return 0;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000342}
343
344ENCODER(hz)
345{
Victor Stinnerd9491262013-04-14 02:06:32 +0200346 while (*inpos < inlen) {
347 Py_UCS4 c = INCHAR1;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000348 DBCHAR code;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000349
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000350 if (c < 0x80) {
351 if (state->i == 0) {
Victor Stinner14c9fea2013-10-29 00:19:27 +0100352 WRITEBYTE1((unsigned char)c);
Victor Stinnerd9491262013-04-14 02:06:32 +0200353 NEXT(1, 1);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000354 }
355 else {
Victor Stinner14c9fea2013-10-29 00:19:27 +0100356 WRITEBYTE3('~', '}', (unsigned char)c);
Victor Stinnerd9491262013-04-14 02:06:32 +0200357 NEXT(1, 3);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000358 state->i = 0;
359 }
360 continue;
361 }
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000362
Victor Stinnerd9491262013-04-14 02:06:32 +0200363 if (c > 0xFFFF)
364 return 1;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000365
Victor Stinner146a2ed2013-10-29 00:09:41 +0100366 if (TRYMAP_ENC(gbcommon, code, c))
367 ;
368 else
369 return 1;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000370
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000371 if (code & 0x8000) /* MSB set: GBK */
372 return 1;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000373
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000374 if (state->i == 0) {
Victor Stinner14c9fea2013-10-29 00:19:27 +0100375 WRITEBYTE4('~', '{', code >> 8, code & 0xff);
Victor Stinnerd9491262013-04-14 02:06:32 +0200376 NEXT(1, 4);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000377 state->i = 1;
378 }
379 else {
Victor Stinner14c9fea2013-10-29 00:19:27 +0100380 WRITEBYTE2(code >> 8, code & 0xff);
Victor Stinnerd9491262013-04-14 02:06:32 +0200381 NEXT(1, 2);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000382 }
383 }
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000384
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000385 return 0;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000386}
387
388DECODER_INIT(hz)
389{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000390 state->i = 0;
391 return 0;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000392}
393
394DECODER_RESET(hz)
395{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000396 state->i = 0;
397 return 0;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000398}
399
400DECODER(hz)
401{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000402 while (inleft > 0) {
Victor Stinnerd9491262013-04-14 02:06:32 +0200403 unsigned char c = INBYTE1;
Victor Stinner11bdf912013-10-28 23:18:39 +0100404 Py_UCS4 decoded;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000405
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000406 if (c == '~') {
Victor Stinnerd9491262013-04-14 02:06:32 +0200407 unsigned char c2 = INBYTE2;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000408
Victor Stinner28c63f72013-10-29 00:59:44 +0100409 REQUIRE_INBUF(2);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000410 if (c2 == '~') {
Victor Stinnera0dd0212013-04-11 22:09:04 +0200411 OUTCHAR('~');
412 NEXT_IN(2);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000413 continue;
414 }
415 else if (c2 == '{' && state->i == 0)
416 state->i = 1; /* set GB */
417 else if (c2 == '}' && state->i == 1)
418 state->i = 0; /* set ASCII */
419 else if (c2 == '\n')
420 ; /* line-continuation */
421 else
Victor Stinner2cded9c2011-07-08 01:45:13 +0200422 return 1;
Victor Stinnera0dd0212013-04-11 22:09:04 +0200423 NEXT_IN(2);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000424 continue;
425 }
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000426
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000427 if (c & 0x80)
428 return 1;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000429
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000430 if (state->i == 0) { /* ASCII mode */
Victor Stinnera0dd0212013-04-11 22:09:04 +0200431 OUTCHAR(c);
432 NEXT_IN(1);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000433 }
434 else { /* GB mode */
Victor Stinner28c63f72013-10-29 00:59:44 +0100435 REQUIRE_INBUF(2);
Victor Stinner11bdf912013-10-28 23:18:39 +0100436 if (TRYMAP_DEC(gb2312, decoded, c, INBYTE2)) {
437 OUTCHAR(decoded);
Victor Stinnera0dd0212013-04-11 22:09:04 +0200438 NEXT_IN(2);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000439 }
440 else
Victor Stinner2cded9c2011-07-08 01:45:13 +0200441 return 1;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000442 }
443 }
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000444
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000445 return 0;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000446}
447
448
449BEGIN_MAPPINGS_LIST
450 MAPPING_DECONLY(gb2312)
451 MAPPING_DECONLY(gbkext)
452 MAPPING_ENCONLY(gbcommon)
453 MAPPING_ENCDEC(gb18030ext)
454END_MAPPINGS_LIST
455
456BEGIN_CODECS_LIST
457 CODEC_STATELESS(gb2312)
458 CODEC_STATELESS(gbk)
459 CODEC_STATELESS(gb18030)
460 CODEC_STATEFUL(hz)
461END_CODECS_LIST
462
463I_AM_A_MODULE_FOR(cn)