blob: 87426d0af9822de339b8fe27c31bec358c05688b [file] [log] [blame]
Josh Coalsonfda98fb2002-05-17 06:33:39 +00001/*
2 * Copyright (C) 2001 Edmund Grimley Evans <edmundo@rano.org>
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 */
18
19#include <assert.h>
20#include <string.h>
21
22#include "charset.h"
23
24void test_any(struct charset *charset)
25{
26 int wc;
27 char s[2];
28
29 assert(charset);
30
31 /* Decoder */
32
33 assert(charset_mbtowc(charset, 0, 0, 0) == 0);
34 assert(charset_mbtowc(charset, 0, 0, 1) == 0);
35 assert(charset_mbtowc(charset, 0, (char *)(-1), 0) == 0);
36
37 assert(charset_mbtowc(charset, 0, "a", 0) == 0);
38 assert(charset_mbtowc(charset, 0, "", 1) == 0);
39 assert(charset_mbtowc(charset, 0, "b", 1) == 1);
40 assert(charset_mbtowc(charset, 0, "", 2) == 0);
41 assert(charset_mbtowc(charset, 0, "c", 2) == 1);
42
43 wc = 'x';
44 assert(charset_mbtowc(charset, &wc, "a", 0) == 0 && wc == 'x');
45 assert(charset_mbtowc(charset, &wc, "", 1) == 0 && wc == 0);
46 assert(charset_mbtowc(charset, &wc, "b", 1) == 1 && wc == 'b');
47 assert(charset_mbtowc(charset, &wc, "", 2) == 0 && wc == 0);
48 assert(charset_mbtowc(charset, &wc, "c", 2) == 1 && wc == 'c');
49
50 /* Encoder */
51
52 assert(charset_wctomb(charset, 0, 0) == 0);
53
54 s[0] = s[1] = '.';
55 assert(charset_wctomb(charset, s, 0) == 1 &&
56 s[0] == '\0' && s[1] == '.');
57 assert(charset_wctomb(charset, s, 'x') == 1 &&
58 s[0] == 'x' && s[1] == '.');
59}
60
61void test_utf8()
62{
63 struct charset *charset;
64 int wc;
65 char s[8];
66
67 charset = charset_find("UTF-8");
68 test_any(charset);
69
70 /* Decoder */
71 wc = 0;
72 assert(charset_mbtowc(charset, &wc, "\177", 1) == 1 && wc == 127);
73 assert(charset_mbtowc(charset, &wc, "\200", 2) == -1);
74 assert(charset_mbtowc(charset, &wc, "\301\277", 9) == -1);
75 assert(charset_mbtowc(charset, &wc, "\302\200", 1) == -1);
76 assert(charset_mbtowc(charset, &wc, "\302\200", 2) == 2 && wc == 128);
77 assert(charset_mbtowc(charset, &wc, "\302\200", 3) == 2 && wc == 128);
78 assert(charset_mbtowc(charset, &wc, "\340\237\200", 9) == -1);
79 assert(charset_mbtowc(charset, &wc, "\340\240\200", 9) == 3 &&
80 wc == 1 << 11);
81 assert(charset_mbtowc(charset, &wc, "\360\217\277\277", 9) == -1);
82 assert(charset_mbtowc(charset, &wc, "\360\220\200\200", 9) == 4 &&
83 wc == 1 << 16);
84 assert(charset_mbtowc(charset, &wc, "\370\207\277\277\277", 9) == -1);
85 assert(charset_mbtowc(charset, &wc, "\370\210\200\200\200", 9) == 5 &&
86 wc == 1 << 21);
87 assert(charset_mbtowc(charset, &wc, "\374\203\277\277\277\277", 9) == -1);
88 assert(charset_mbtowc(charset, &wc, "\374\204\200\200\200\200", 9) == 6 &&
89 wc == 1 << 26);
90 assert(charset_mbtowc(charset, &wc, "\375\277\277\277\277\277", 9) == 6 &&
91 wc == 0x7fffffff);
92
93 assert(charset_mbtowc(charset, &wc, "\302\000", 2) == -1);
94 assert(charset_mbtowc(charset, &wc, "\302\300", 2) == -1);
95 assert(charset_mbtowc(charset, &wc, "\340\040\200", 9) == -1);
96 assert(charset_mbtowc(charset, &wc, "\340\340\200", 9) == -1);
97 assert(charset_mbtowc(charset, &wc, "\340\240\000", 9) == -1);
98 assert(charset_mbtowc(charset, &wc, "\340\240\300", 9) == -1);
99 assert(charset_mbtowc(charset, &wc, "\360\020\200\200", 9) == -1);
100 assert(charset_mbtowc(charset, &wc, "\360\320\200\200", 9) == -1);
101 assert(charset_mbtowc(charset, &wc, "\360\220\000\200", 9) == -1);
102 assert(charset_mbtowc(charset, &wc, "\360\220\300\200", 9) == -1);
103 assert(charset_mbtowc(charset, &wc, "\360\220\200\000", 9) == -1);
104 assert(charset_mbtowc(charset, &wc, "\360\220\200\300", 9) == -1);
105 assert(charset_mbtowc(charset, &wc, "\375\077\277\277\277\277", 9) == -1);
106 assert(charset_mbtowc(charset, &wc, "\375\377\277\277\277\277", 9) == -1);
107 assert(charset_mbtowc(charset, &wc, "\375\277\077\277\277\277", 9) == -1);
108 assert(charset_mbtowc(charset, &wc, "\375\277\377\277\277\277", 9) == -1);
109 assert(charset_mbtowc(charset, &wc, "\375\277\277\277\077\277", 9) == -1);
110 assert(charset_mbtowc(charset, &wc, "\375\277\277\277\377\277", 9) == -1);
111 assert(charset_mbtowc(charset, &wc, "\375\277\277\277\277\077", 9) == -1);
112 assert(charset_mbtowc(charset, &wc, "\375\277\277\277\277\377", 9) == -1);
113
114 assert(charset_mbtowc(charset, &wc, "\376\277\277\277\277\277", 9) == -1);
115 assert(charset_mbtowc(charset, &wc, "\377\277\277\277\277\277", 9) == -1);
116
117 /* Encoder */
118 strcpy(s, ".......");
119 assert(charset_wctomb(charset, s, 1 << 31) == -1 &&
120 !strcmp(s, "......."));
121 assert(charset_wctomb(charset, s, 127) == 1 &&
122 !strcmp(s, "\177......"));
123 assert(charset_wctomb(charset, s, 128) == 2 &&
124 !strcmp(s, "\302\200....."));
125 assert(charset_wctomb(charset, s, 0x7ff) == 2 &&
126 !strcmp(s, "\337\277....."));
127 assert(charset_wctomb(charset, s, 0x800) == 3 &&
128 !strcmp(s, "\340\240\200...."));
129 assert(charset_wctomb(charset, s, 0xffff) == 3 &&
130 !strcmp(s, "\357\277\277...."));
131 assert(charset_wctomb(charset, s, 0x10000) == 4 &&
132 !strcmp(s, "\360\220\200\200..."));
133 assert(charset_wctomb(charset, s, 0x1fffff) == 4 &&
134 !strcmp(s, "\367\277\277\277..."));
135 assert(charset_wctomb(charset, s, 0x200000) == 5 &&
136 !strcmp(s, "\370\210\200\200\200.."));
137 assert(charset_wctomb(charset, s, 0x3ffffff) == 5 &&
138 !strcmp(s, "\373\277\277\277\277.."));
139 assert(charset_wctomb(charset, s, 0x4000000) == 6 &&
140 !strcmp(s, "\374\204\200\200\200\200."));
141 assert(charset_wctomb(charset, s, 0x7fffffff) == 6 &&
142 !strcmp(s, "\375\277\277\277\277\277."));
143}
144
145void test_ascii()
146{
147 struct charset *charset;
148 int wc;
149 char s[3];
150
151 charset = charset_find("us-ascii");
152 test_any(charset);
153
154 /* Decoder */
155 wc = 0;
156 assert(charset_mbtowc(charset, &wc, "\177", 2) == 1 && wc == 127);
157 assert(charset_mbtowc(charset, &wc, "\200", 2) == -1);
158
159 /* Encoder */
160 strcpy(s, "..");
161 assert(charset_wctomb(charset, s, 256) == -1 && !strcmp(s, ".."));
162 assert(charset_wctomb(charset, s, 255) == -1);
163 assert(charset_wctomb(charset, s, 128) == -1);
164 assert(charset_wctomb(charset, s, 127) == 1 && !strcmp(s, "\177."));
165}
166
167void test_iso1()
168{
169 struct charset *charset;
170 int wc;
171 char s[3];
172
173 charset = charset_find("iso-8859-1");
174 test_any(charset);
175
176 /* Decoder */
177 wc = 0;
178 assert(charset_mbtowc(charset, &wc, "\302\200", 9) == 1 && wc == 0xc2);
179
180 /* Encoder */
181 strcpy(s, "..");
182 assert(charset_wctomb(charset, s, 256) == -1 && !strcmp(s, ".."));
183 assert(charset_wctomb(charset, s, 255) == 1 && !strcmp(s, "\377."));
184 assert(charset_wctomb(charset, s, 128) == 1 && !strcmp(s, "\200."));
185}
186
187void test_iso2()
188{
189 struct charset *charset;
190 int wc;
191 char s[3];
192
193 charset = charset_find("iso-8859-2");
194 test_any(charset);
195
196 /* Decoder */
197 wc = 0;
198 assert(charset_mbtowc(charset, &wc, "\302\200", 9) == 1 && wc == 0xc2);
199 assert(charset_mbtowc(charset, &wc, "\377", 2) == 1 && wc == 0x2d9);
200
201 /* Encoder */
202 strcpy(s, "..");
203 assert(charset_wctomb(charset, s, 256) == -1 && !strcmp(s, ".."));
204 assert(charset_wctomb(charset, s, 255) == -1 && !strcmp(s, ".."));
205 assert(charset_wctomb(charset, s, 258) == 1 && !strcmp(s, "\303."));
206 assert(charset_wctomb(charset, s, 128) == 1 && !strcmp(s, "\200."));
207}
208
209void test_convert()
210{
211 const char *p;
212 char *q, *r;
213 char s[256];
214 size_t n, n2;
215 int i;
216
217 p = "\000x\302\200\375\277\277\277\277\277";
218 assert(charset_convert("UTF-8", "UTF-8", p, 10, &q, &n) == 0 &&
219 n == 10 && !strcmp(p, q));
220 assert(charset_convert("UTF-8", "UTF-8", "x\301\277y", 4, &q, &n) == 2 &&
221 n == 4 && !strcmp(q, "x##y"));
222 assert(charset_convert("UTF-8", "UTF-8", "x\301\277y", 4, 0, &n) == 2 &&
223 n == 4);
224 assert(charset_convert("UTF-8", "UTF-8", "x\301\277y", 4, &q, 0) == 2 &&
225 !strcmp(q, "x##y"));
226 assert(charset_convert("UTF-8", "iso-8859-1",
227 "\302\200\304\200x", 5, &q, &n) == 1 &&
228 n == 3 && !strcmp(q, "\200?x"));
229 assert(charset_convert("iso-8859-1", "UTF-8",
230 "\000\200\377", 3, &q, &n) == 0 &&
231 n == 5 && !memcmp(q, "\000\302\200\303\277", 5));
232 assert(charset_convert("iso-8859-1", "iso-8859-1",
233 "\000\200\377", 3, &q, &n) == 0 &&
234 n == 3 && !memcmp(q, "\000\200\377", 3));
235
236 assert(charset_convert("iso-8859-2", "utf-8", "\300", 1, &q, &n) == 0 &&
237 n == 2 && !strcmp(q, "\305\224"));
238 assert(charset_convert("utf-8", "iso-8859-2", "\305\224", 2, &q, &n) == 0 &&
239 n == 1 && !strcmp(q, "\300"));
240
241 for (i = 0; i < 256; i++)
242 s[i] = i;
243
244 assert(charset_convert("iso-8859-2", "utf-8", s, 256, &q, &n) == 0);
245 assert(charset_convert("utf-8", "iso-8859-2", q, n, &r, &n2) == 0);
246 assert(n2 == 256 && !memcmp(r, s, n2));
247}
248
249int main()
250{
251 test_utf8();
252 test_ascii();
253 test_iso1();
254 test_iso2();
255
256 test_convert();
257
258 return 0;
259}