Hye-Shik Chang | 2bb146f | 2004-07-18 03:06:29 +0000 | [diff] [blame] | 1 | /* |
| 2 | * _codecs_hk.c: Codecs collection for encodings from Hong Kong |
| 3 | * |
| 4 | * Written by Hye-Shik Chang <perky@FreeBSD.org> |
Hye-Shik Chang | 2bb146f | 2004-07-18 03:06:29 +0000 | [diff] [blame] | 5 | */ |
| 6 | |
| 7 | #define USING_IMPORTED_MAPS |
| 8 | |
| 9 | #include "cjkcodecs.h" |
| 10 | #include "mappings_hk.h" |
| 11 | |
| 12 | /* |
| 13 | * BIG5HKSCS codec |
| 14 | */ |
| 15 | |
| 16 | static const encode_map *big5_encmap = NULL; |
| 17 | static const decode_map *big5_decmap = NULL; |
| 18 | |
| 19 | CODEC_INIT(big5hkscs) |
| 20 | { |
Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 21 | static int initialized = 0; |
Hye-Shik Chang | 2bb146f | 2004-07-18 03:06:29 +0000 | [diff] [blame] | 22 | |
Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 23 | if (!initialized && IMPORT_MAP(tw, big5, &big5_encmap, &big5_decmap)) |
| 24 | return -1; |
| 25 | initialized = 1; |
| 26 | return 0; |
Hye-Shik Chang | 2bb146f | 2004-07-18 03:06:29 +0000 | [diff] [blame] | 27 | } |
| 28 | |
Christian Heimes | 77c02eb | 2008-02-09 02:18:51 +0000 | [diff] [blame] | 29 | /* |
| 30 | * There are four possible pair unicode -> big5hkscs maps as in HKSCS 2004: |
| 31 | * U+00CA U+0304 -> 8862 (U+00CA alone is mapped to 8866) |
| 32 | * U+00CA U+030C -> 8864 |
| 33 | * U+00EA U+0304 -> 88a3 (U+00EA alone is mapped to 88a7) |
| 34 | * U+00EA U+030C -> 88a5 |
| 35 | * These are handled by not mapping tables but a hand-written code. |
| 36 | */ |
| 37 | static const DBCHAR big5hkscs_pairenc_table[4] = {0x8862, 0x8864, 0x88a3, 0x88a5}; |
| 38 | |
Hye-Shik Chang | 2bb146f | 2004-07-18 03:06:29 +0000 | [diff] [blame] | 39 | ENCODER(big5hkscs) |
| 40 | { |
Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 41 | while (inleft > 0) { |
| 42 | ucs4_t c = **inbuf; |
| 43 | DBCHAR code; |
| 44 | Py_ssize_t insize; |
Hye-Shik Chang | 2bb146f | 2004-07-18 03:06:29 +0000 | [diff] [blame] | 45 | |
Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 46 | if (c < 0x80) { |
| 47 | REQUIRE_OUTBUF(1) |
| 48 | **outbuf = (unsigned char)c; |
| 49 | NEXT(1, 1) |
| 50 | continue; |
| 51 | } |
Hye-Shik Chang | 2bb146f | 2004-07-18 03:06:29 +0000 | [diff] [blame] | 52 | |
Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 53 | DECODE_SURROGATE(c) |
| 54 | insize = GET_INSIZE(c); |
Hye-Shik Chang | 2bb146f | 2004-07-18 03:06:29 +0000 | [diff] [blame] | 55 | |
Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 56 | REQUIRE_OUTBUF(2) |
Hye-Shik Chang | 2bb146f | 2004-07-18 03:06:29 +0000 | [diff] [blame] | 57 | |
Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 58 | if (c < 0x10000) { |
| 59 | TRYMAP_ENC(big5hkscs_bmp, code, c) { |
| 60 | if (code == MULTIC) { |
| 61 | if (inleft >= 2 && |
| 62 | ((c & 0xffdf) == 0x00ca) && |
| 63 | (((*inbuf)[1] & 0xfff7) == 0x0304)) { |
| 64 | code = big5hkscs_pairenc_table[ |
| 65 | ((c >> 4) | |
| 66 | ((*inbuf)[1] >> 3)) & 3]; |
| 67 | insize = 2; |
| 68 | } |
| 69 | else if (inleft < 2 && |
| 70 | !(flags & MBENC_FLUSH)) |
| 71 | return MBERR_TOOFEW; |
| 72 | else { |
| 73 | if (c == 0xca) |
| 74 | code = 0x8866; |
| 75 | else /* c == 0xea */ |
| 76 | code = 0x88a7; |
| 77 | } |
| 78 | } |
| 79 | } |
| 80 | else TRYMAP_ENC(big5, code, c); |
| 81 | else return 1; |
| 82 | } |
| 83 | else if (c < 0x20000) |
| 84 | return insize; |
| 85 | else if (c < 0x30000) { |
| 86 | TRYMAP_ENC(big5hkscs_nonbmp, code, c & 0xffff); |
| 87 | else return insize; |
| 88 | } |
| 89 | else |
| 90 | return insize; |
Hye-Shik Chang | 2bb146f | 2004-07-18 03:06:29 +0000 | [diff] [blame] | 91 | |
Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 92 | OUT1(code >> 8) |
| 93 | OUT2(code & 0xFF) |
| 94 | NEXT(insize, 2) |
| 95 | } |
Hye-Shik Chang | 2bb146f | 2004-07-18 03:06:29 +0000 | [diff] [blame] | 96 | |
Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 97 | return 0; |
Hye-Shik Chang | 2bb146f | 2004-07-18 03:06:29 +0000 | [diff] [blame] | 98 | } |
| 99 | |
Christian Heimes | 77c02eb | 2008-02-09 02:18:51 +0000 | [diff] [blame] | 100 | #define BH2S(c1, c2) (((c1) - 0x87) * (0xfe - 0x40 + 1) + ((c2) - 0x40)) |
Hye-Shik Chang | 2bb146f | 2004-07-18 03:06:29 +0000 | [diff] [blame] | 101 | |
| 102 | DECODER(big5hkscs) |
| 103 | { |
Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 104 | while (inleft > 0) { |
| 105 | unsigned char c = IN1; |
| 106 | ucs4_t decoded; |
Hye-Shik Chang | 2bb146f | 2004-07-18 03:06:29 +0000 | [diff] [blame] | 107 | |
Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 108 | REQUIRE_OUTBUF(1) |
Hye-Shik Chang | 2bb146f | 2004-07-18 03:06:29 +0000 | [diff] [blame] | 109 | |
Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 110 | if (c < 0x80) { |
| 111 | OUT1(c) |
| 112 | NEXT(1, 1) |
| 113 | continue; |
| 114 | } |
Hye-Shik Chang | 2bb146f | 2004-07-18 03:06:29 +0000 | [diff] [blame] | 115 | |
Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 116 | REQUIRE_INBUF(2) |
Hye-Shik Chang | 2bb146f | 2004-07-18 03:06:29 +0000 | [diff] [blame] | 117 | |
Victor Stinner | 5dfe3bb | 2011-06-03 23:34:09 +0200 | [diff] [blame] | 118 | if (0xc6 > c || c > 0xc8 || (c < 0xc7 && IN2 < 0xa1)) { |
| 119 | TRYMAP_DEC(big5, **outbuf, c, IN2) { |
| 120 | NEXT(2, 1) |
| 121 | continue; |
| 122 | } |
Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 123 | } |
Hye-Shik Chang | 2bb146f | 2004-07-18 03:06:29 +0000 | [diff] [blame] | 124 | |
Victor Stinner | 5dfe3bb | 2011-06-03 23:34:09 +0200 | [diff] [blame] | 125 | TRYMAP_DEC(big5hkscs, decoded, c, IN2) |
| 126 | { |
| 127 | int s = BH2S(c, IN2); |
| 128 | const unsigned char *hintbase; |
Hye-Shik Chang | 2bb146f | 2004-07-18 03:06:29 +0000 | [diff] [blame] | 129 | |
Victor Stinner | 5dfe3bb | 2011-06-03 23:34:09 +0200 | [diff] [blame] | 130 | assert(0x87 <= c && c <= 0xfe); |
| 131 | assert(0x40 <= IN2 && IN2 <= 0xfe); |
Hye-Shik Chang | 2bb146f | 2004-07-18 03:06:29 +0000 | [diff] [blame] | 132 | |
Victor Stinner | 5dfe3bb | 2011-06-03 23:34:09 +0200 | [diff] [blame] | 133 | if (BH2S(0x87, 0x40) <= s && s <= BH2S(0xa0, 0xfe)) { |
| 134 | hintbase = big5hkscs_phint_0; |
| 135 | s -= BH2S(0x87, 0x40); |
| 136 | } |
| 137 | else if (BH2S(0xc6,0xa1) <= s && s <= BH2S(0xc8,0xfe)){ |
| 138 | hintbase = big5hkscs_phint_12130; |
| 139 | s -= BH2S(0xc6, 0xa1); |
| 140 | } |
| 141 | else if (BH2S(0xf9,0xd6) <= s && s <= BH2S(0xfe,0xfe)){ |
| 142 | hintbase = big5hkscs_phint_21924; |
| 143 | s -= BH2S(0xf9, 0xd6); |
| 144 | } |
| 145 | else |
| 146 | return MBERR_INTERNAL; |
Christian Heimes | 77c02eb | 2008-02-09 02:18:51 +0000 | [diff] [blame] | 147 | |
Victor Stinner | 5dfe3bb | 2011-06-03 23:34:09 +0200 | [diff] [blame] | 148 | if (hintbase[s >> 3] & (1 << (s & 7))) { |
| 149 | WRITEUCS4(decoded | 0x20000) |
| 150 | NEXT_IN(2) |
| 151 | } |
| 152 | else { |
| 153 | OUT1(decoded) |
| 154 | NEXT(2, 1) |
| 155 | } |
| 156 | continue; |
Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 157 | } |
Victor Stinner | 5dfe3bb | 2011-06-03 23:34:09 +0200 | [diff] [blame] | 158 | |
| 159 | switch ((c << 8) | IN2) { |
| 160 | case 0x8862: WRITE2(0x00ca, 0x0304); break; |
| 161 | case 0x8864: WRITE2(0x00ca, 0x030c); break; |
| 162 | case 0x88a3: WRITE2(0x00ea, 0x0304); break; |
| 163 | case 0x88a5: WRITE2(0x00ea, 0x030c); break; |
Victor Stinner | 2cded9c | 2011-07-08 01:45:13 +0200 | [diff] [blame^] | 164 | default: return 1; |
Victor Stinner | 5dfe3bb | 2011-06-03 23:34:09 +0200 | [diff] [blame] | 165 | } |
| 166 | |
| 167 | NEXT(2, 2) /* all decoded codepoints are pairs, above. */ |
Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 168 | } |
Hye-Shik Chang | 2bb146f | 2004-07-18 03:06:29 +0000 | [diff] [blame] | 169 | |
Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 170 | return 0; |
Hye-Shik Chang | 2bb146f | 2004-07-18 03:06:29 +0000 | [diff] [blame] | 171 | } |
| 172 | |
| 173 | |
| 174 | BEGIN_MAPPINGS_LIST |
| 175 | MAPPING_DECONLY(big5hkscs) |
| 176 | MAPPING_ENCONLY(big5hkscs_bmp) |
| 177 | MAPPING_ENCONLY(big5hkscs_nonbmp) |
| 178 | END_MAPPINGS_LIST |
| 179 | |
| 180 | BEGIN_CODECS_LIST |
| 181 | CODEC_STATELESS_WINIT(big5hkscs) |
| 182 | END_CODECS_LIST |
| 183 | |
| 184 | I_AM_A_MODULE_FOR(hk) |