blob: b66d67de882c3d098d661f54cbc2b19983bab32b [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * linux/fs/hfsplus/unicode.c
3 *
4 * Copyright (C) 2001
5 * Brad Boyer (flar@allandria.com)
6 * (C) 2003 Ardis Technologies <roman@ardistech.com>
7 *
8 * Handler routines for unicode strings
9 */
10
11#include <linux/types.h>
12#include <linux/nls.h>
13#include "hfsplus_fs.h"
14#include "hfsplus_raw.h"
15
16/* Fold the case of a unicode char, given the 16 bit value */
17/* Returns folded char, or 0 if ignorable */
18static inline u16 case_fold(u16 c)
19{
20 u16 tmp;
21
22 tmp = hfsplus_case_fold_table[c >> 8];
23 if (tmp)
24 tmp = hfsplus_case_fold_table[tmp + (c & 0xff)];
25 else
26 tmp = c;
27 return tmp;
28}
29
30/* Compare unicode strings, return values like normal strcmp */
David Elliott2179d372006-01-18 17:43:08 -080031int hfsplus_strcasecmp(const struct hfsplus_unistr *s1,
32 const struct hfsplus_unistr *s2)
Linus Torvalds1da177e2005-04-16 15:20:36 -070033{
34 u16 len1, len2, c1, c2;
35 const hfsplus_unichr *p1, *p2;
36
37 len1 = be16_to_cpu(s1->length);
38 len2 = be16_to_cpu(s2->length);
39 p1 = s1->unicode;
40 p2 = s2->unicode;
41
42 while (1) {
43 c1 = c2 = 0;
44
45 while (len1 && !c1) {
46 c1 = case_fold(be16_to_cpu(*p1));
47 p1++;
48 len1--;
49 }
50 while (len2 && !c2) {
51 c2 = case_fold(be16_to_cpu(*p2));
52 p2++;
53 len2--;
54 }
55
56 if (c1 != c2)
57 return (c1 < c2) ? -1 : 1;
58 if (!c1 && !c2)
59 return 0;
60 }
61}
62
David Elliott2179d372006-01-18 17:43:08 -080063/* Compare names as a sequence of 16-bit unsigned integers */
64int hfsplus_strcmp(const struct hfsplus_unistr *s1,
65 const struct hfsplus_unistr *s2)
66{
67 u16 len1, len2, c1, c2;
68 const hfsplus_unichr *p1, *p2;
69 int len;
70
71 len1 = be16_to_cpu(s1->length);
72 len2 = be16_to_cpu(s2->length);
73 p1 = s1->unicode;
74 p2 = s2->unicode;
75
76 for (len = min(len1, len2); len > 0; len--) {
77 c1 = be16_to_cpu(*p1);
78 c2 = be16_to_cpu(*p2);
79 if (c1 != c2)
80 return c1 < c2 ? -1 : 1;
81 p1++;
82 p2++;
83 }
84
85 return len1 < len2 ? -1 :
86 len1 > len2 ? 1 : 0;
87}
88
89
Linus Torvalds1da177e2005-04-16 15:20:36 -070090#define Hangul_SBase 0xac00
91#define Hangul_LBase 0x1100
92#define Hangul_VBase 0x1161
93#define Hangul_TBase 0x11a7
94#define Hangul_SCount 11172
95#define Hangul_LCount 19
96#define Hangul_VCount 21
97#define Hangul_TCount 28
98#define Hangul_NCount (Hangul_VCount * Hangul_TCount)
99
100
101static u16 *hfsplus_compose_lookup(u16 *p, u16 cc)
102{
103 int i, s, e;
104
105 s = 1;
106 e = p[1];
107 if (!e || cc < p[s * 2] || cc > p[e * 2])
108 return NULL;
109 do {
110 i = (s + e) / 2;
111 if (cc > p[i * 2])
112 s = i + 1;
113 else if (cc < p[i * 2])
114 e = i - 1;
115 else
116 return hfsplus_compose_table + p[i * 2 + 1];
117 } while (s <= e);
118 return NULL;
119}
120
121int hfsplus_uni2asc(struct super_block *sb, const struct hfsplus_unistr *ustr, char *astr, int *len_p)
122{
123 const hfsplus_unichr *ip;
Christoph Hellwigdd73a012010-10-01 05:42:59 +0200124 struct nls_table *nls = HFSPLUS_SB(sb)->nls;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700125 u8 *op;
126 u16 cc, c0, c1;
127 u16 *ce1, *ce2;
128 int i, len, ustrlen, res, compose;
129
130 op = astr;
131 ip = ustr->unicode;
132 ustrlen = be16_to_cpu(ustr->length);
133 len = *len_p;
134 ce1 = NULL;
Christoph Hellwig84adede2010-10-01 05:45:20 +0200135 compose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700136
137 while (ustrlen > 0) {
138 c0 = be16_to_cpu(*ip++);
139 ustrlen--;
140 /* search for single decomposed char */
141 if (likely(compose))
142 ce1 = hfsplus_compose_lookup(hfsplus_compose_table, c0);
143 if (ce1 && (cc = ce1[0])) {
144 /* start of a possibly decomposed Hangul char */
145 if (cc != 0xffff)
146 goto done;
147 if (!ustrlen)
148 goto same;
149 c1 = be16_to_cpu(*ip) - Hangul_VBase;
150 if (c1 < Hangul_VCount) {
151 /* compose the Hangul char */
152 cc = (c0 - Hangul_LBase) * Hangul_VCount;
153 cc = (cc + c1) * Hangul_TCount;
154 cc += Hangul_SBase;
155 ip++;
156 ustrlen--;
157 if (!ustrlen)
158 goto done;
159 c1 = be16_to_cpu(*ip) - Hangul_TBase;
160 if (c1 > 0 && c1 < Hangul_TCount) {
161 cc += c1;
162 ip++;
163 ustrlen--;
164 }
165 goto done;
166 }
167 }
168 while (1) {
169 /* main loop for common case of not composed chars */
170 if (!ustrlen)
171 goto same;
172 c1 = be16_to_cpu(*ip);
173 if (likely(compose))
174 ce1 = hfsplus_compose_lookup(hfsplus_compose_table, c1);
175 if (ce1)
176 break;
177 switch (c0) {
178 case 0:
179 c0 = 0x2400;
180 break;
181 case '/':
182 c0 = ':';
183 break;
184 }
185 res = nls->uni2char(c0, op, len);
186 if (res < 0) {
187 if (res == -ENAMETOOLONG)
188 goto out;
189 *op = '?';
190 res = 1;
191 }
192 op += res;
193 len -= res;
194 c0 = c1;
195 ip++;
196 ustrlen--;
197 }
198 ce2 = hfsplus_compose_lookup(ce1, c0);
199 if (ce2) {
200 i = 1;
201 while (i < ustrlen) {
202 ce1 = hfsplus_compose_lookup(ce2, be16_to_cpu(ip[i]));
203 if (!ce1)
204 break;
205 i++;
206 ce2 = ce1;
207 }
208 if ((cc = ce2[0])) {
209 ip += i;
210 ustrlen -= i;
211 goto done;
212 }
213 }
214 same:
215 switch (c0) {
216 case 0:
217 cc = 0x2400;
218 break;
219 case '/':
220 cc = ':';
221 break;
222 default:
223 cc = c0;
224 }
225 done:
226 res = nls->uni2char(cc, op, len);
227 if (res < 0) {
228 if (res == -ENAMETOOLONG)
229 goto out;
230 *op = '?';
231 res = 1;
232 }
233 op += res;
234 len -= res;
235 }
236 res = 0;
237out:
238 *len_p = (char *)op - astr;
239 return res;
240}
241
Duane Griffin1e96b7c2007-07-15 23:41:22 -0700242/*
243 * Convert one or more ASCII characters into a single unicode character.
244 * Returns the number of ASCII characters corresponding to the unicode char.
245 */
246static inline int asc2unichar(struct super_block *sb, const char *astr, int len,
247 wchar_t *uc)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700248{
Christoph Hellwigdd73a012010-10-01 05:42:59 +0200249 int size = HFSPLUS_SB(sb)->nls->char2uni(astr, len, uc);
Duane Griffin1e96b7c2007-07-15 23:41:22 -0700250 if (size <= 0) {
251 *uc = '?';
252 size = 1;
253 }
254 switch (*uc) {
255 case 0x2400:
256 *uc = 0;
257 break;
258 case ':':
259 *uc = '/';
260 break;
261 }
262 return size;
263}
264
265/* Decomposes a single unicode character. */
266static inline u16 *decompose_unichar(wchar_t uc, int *size)
267{
268 int off;
269
270 off = hfsplus_decompose_table[(uc >> 12) & 0xf];
271 if (off == 0 || off == 0xffff)
272 return NULL;
273
274 off = hfsplus_decompose_table[off + ((uc >> 8) & 0xf)];
275 if (!off)
276 return NULL;
277
278 off = hfsplus_decompose_table[off + ((uc >> 4) & 0xf)];
279 if (!off)
280 return NULL;
281
282 off = hfsplus_decompose_table[off + (uc & 0xf)];
283 *size = off & 3;
284 if (*size == 0)
285 return NULL;
286 return hfsplus_decompose_table + (off / 4);
287}
288
289int hfsplus_asc2uni(struct super_block *sb, struct hfsplus_unistr *ustr,
290 const char *astr, int len)
291{
292 int size, dsize, decompose;
293 u16 *dstr, outlen = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700294 wchar_t c;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700295
Christoph Hellwig84adede2010-10-01 05:45:20 +0200296 decompose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700297 while (outlen < HFSPLUS_MAX_STRLEN && len > 0) {
Duane Griffin1e96b7c2007-07-15 23:41:22 -0700298 size = asc2unichar(sb, astr, len, &c);
299
300 if (decompose && (dstr = decompose_unichar(c, &dsize))) {
301 if (outlen + dsize > HFSPLUS_MAX_STRLEN)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700302 break;
303 do {
Duane Griffin1e96b7c2007-07-15 23:41:22 -0700304 ustr->unicode[outlen++] = cpu_to_be16(*dstr++);
305 } while (--dsize > 0);
306 } else
307 ustr->unicode[outlen++] = cpu_to_be16(c);
308
309 astr += size;
310 len -= size;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700311 }
312 ustr->length = cpu_to_be16(outlen);
313 if (len > 0)
314 return -ENAMETOOLONG;
315 return 0;
316}
Duane Griffind45bce82007-07-15 23:41:23 -0700317
318/*
319 * Hash a string to an integer as appropriate for the HFS+ filesystem.
320 * Composed unicode characters are decomposed and case-folding is performed
321 * if the appropriate bits are (un)set on the superblock.
322 */
323int hfsplus_hash_dentry(struct dentry *dentry, struct qstr *str)
324{
325 struct super_block *sb = dentry->d_sb;
326 const char *astr;
327 const u16 *dstr;
Andrew Morton8aa84ab2008-02-08 04:20:52 -0800328 int casefold, decompose, size, len;
Duane Griffind45bce82007-07-15 23:41:23 -0700329 unsigned long hash;
330 wchar_t c;
331 u16 c2;
332
Christoph Hellwig84adede2010-10-01 05:45:20 +0200333 casefold = test_bit(HFSPLUS_SB_CASEFOLD, &HFSPLUS_SB(sb)->flags);
334 decompose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags);
Duane Griffind45bce82007-07-15 23:41:23 -0700335 hash = init_name_hash();
336 astr = str->name;
337 len = str->len;
338 while (len > 0) {
Andrew Morton8aa84ab2008-02-08 04:20:52 -0800339 int uninitialized_var(dsize);
Duane Griffind45bce82007-07-15 23:41:23 -0700340 size = asc2unichar(sb, astr, len, &c);
341 astr += size;
342 len -= size;
343
344 if (decompose && (dstr = decompose_unichar(c, &dsize))) {
345 do {
346 c2 = *dstr++;
347 if (!casefold || (c2 = case_fold(c2)))
348 hash = partial_name_hash(c2, hash);
349 } while (--dsize > 0);
350 } else {
351 c2 = c;
352 if (!casefold || (c2 = case_fold(c2)))
353 hash = partial_name_hash(c2, hash);
354 }
355 }
356 str->hash = end_name_hash(hash);
357
358 return 0;
359}
360
361/*
362 * Compare strings with HFS+ filename ordering.
363 * Composed unicode characters are decomposed and case-folding is performed
364 * if the appropriate bits are (un)set on the superblock.
365 */
366int hfsplus_compare_dentry(struct dentry *dentry, struct qstr *s1, struct qstr *s2)
367{
368 struct super_block *sb = dentry->d_sb;
369 int casefold, decompose, size;
370 int dsize1, dsize2, len1, len2;
371 const u16 *dstr1, *dstr2;
372 const char *astr1, *astr2;
373 u16 c1, c2;
374 wchar_t c;
375
Christoph Hellwig84adede2010-10-01 05:45:20 +0200376 casefold = test_bit(HFSPLUS_SB_CASEFOLD, &HFSPLUS_SB(sb)->flags);
377 decompose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags);
Duane Griffind45bce82007-07-15 23:41:23 -0700378 astr1 = s1->name;
379 len1 = s1->len;
380 astr2 = s2->name;
381 len2 = s2->len;
382 dsize1 = dsize2 = 0;
383 dstr1 = dstr2 = NULL;
384
385 while (len1 > 0 && len2 > 0) {
386 if (!dsize1) {
387 size = asc2unichar(sb, astr1, len1, &c);
388 astr1 += size;
389 len1 -= size;
390
391 if (!decompose || !(dstr1 = decompose_unichar(c, &dsize1))) {
392 c1 = c;
393 dstr1 = &c1;
394 dsize1 = 1;
395 }
396 }
397
398 if (!dsize2) {
399 size = asc2unichar(sb, astr2, len2, &c);
400 astr2 += size;
401 len2 -= size;
402
403 if (!decompose || !(dstr2 = decompose_unichar(c, &dsize2))) {
404 c2 = c;
405 dstr2 = &c2;
406 dsize2 = 1;
407 }
408 }
409
410 c1 = *dstr1;
411 c2 = *dstr2;
412 if (casefold) {
413 if (!(c1 = case_fold(c1))) {
414 dstr1++;
415 dsize1--;
416 continue;
417 }
418 if (!(c2 = case_fold(c2))) {
419 dstr2++;
420 dsize2--;
421 continue;
422 }
423 }
424 if (c1 < c2)
425 return -1;
426 else if (c1 > c2)
427 return 1;
428
429 dstr1++;
430 dsize1--;
431 dstr2++;
432 dsize2--;
433 }
434
435 if (len1 < len2)
436 return -1;
437 if (len1 > len2)
438 return 1;
439 return 0;
440}