blob: e563939882f35b8fb0cb6efcd3f5c16897112419 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * linux/fs/hfsplus/unicode.c
3 *
4 * Copyright (C) 2001
5 * Brad Boyer (flar@allandria.com)
6 * (C) 2003 Ardis Technologies <roman@ardistech.com>
7 *
8 * Handler routines for unicode strings
9 */
10
11#include <linux/types.h>
12#include <linux/nls.h>
13#include "hfsplus_fs.h"
14#include "hfsplus_raw.h"
15
16/* Fold the case of a unicode char, given the 16 bit value */
17/* Returns folded char, or 0 if ignorable */
18static inline u16 case_fold(u16 c)
19{
Anton Salikhmetov20b76432010-12-16 18:08:40 +020020 u16 tmp;
Linus Torvalds1da177e2005-04-16 15:20:36 -070021
Anton Salikhmetov20b76432010-12-16 18:08:40 +020022 tmp = hfsplus_case_fold_table[c >> 8];
23 if (tmp)
24 tmp = hfsplus_case_fold_table[tmp + (c & 0xff)];
25 else
26 tmp = c;
27 return tmp;
Linus Torvalds1da177e2005-04-16 15:20:36 -070028}
29
30/* Compare unicode strings, return values like normal strcmp */
David Elliott2179d372006-01-18 17:43:08 -080031int hfsplus_strcasecmp(const struct hfsplus_unistr *s1,
32 const struct hfsplus_unistr *s2)
Linus Torvalds1da177e2005-04-16 15:20:36 -070033{
34 u16 len1, len2, c1, c2;
35 const hfsplus_unichr *p1, *p2;
36
37 len1 = be16_to_cpu(s1->length);
38 len2 = be16_to_cpu(s2->length);
39 p1 = s1->unicode;
40 p2 = s2->unicode;
41
42 while (1) {
43 c1 = c2 = 0;
44
45 while (len1 && !c1) {
46 c1 = case_fold(be16_to_cpu(*p1));
47 p1++;
48 len1--;
49 }
50 while (len2 && !c2) {
51 c2 = case_fold(be16_to_cpu(*p2));
52 p2++;
53 len2--;
54 }
55
56 if (c1 != c2)
57 return (c1 < c2) ? -1 : 1;
58 if (!c1 && !c2)
59 return 0;
60 }
61}
62
David Elliott2179d372006-01-18 17:43:08 -080063/* Compare names as a sequence of 16-bit unsigned integers */
64int hfsplus_strcmp(const struct hfsplus_unistr *s1,
65 const struct hfsplus_unistr *s2)
66{
67 u16 len1, len2, c1, c2;
68 const hfsplus_unichr *p1, *p2;
69 int len;
70
71 len1 = be16_to_cpu(s1->length);
72 len2 = be16_to_cpu(s2->length);
73 p1 = s1->unicode;
74 p2 = s2->unicode;
75
76 for (len = min(len1, len2); len > 0; len--) {
77 c1 = be16_to_cpu(*p1);
78 c2 = be16_to_cpu(*p2);
79 if (c1 != c2)
80 return c1 < c2 ? -1 : 1;
81 p1++;
82 p2++;
83 }
84
85 return len1 < len2 ? -1 :
86 len1 > len2 ? 1 : 0;
87}
88
89
Linus Torvalds1da177e2005-04-16 15:20:36 -070090#define Hangul_SBase 0xac00
91#define Hangul_LBase 0x1100
92#define Hangul_VBase 0x1161
93#define Hangul_TBase 0x11a7
94#define Hangul_SCount 11172
95#define Hangul_LCount 19
96#define Hangul_VCount 21
97#define Hangul_TCount 28
98#define Hangul_NCount (Hangul_VCount * Hangul_TCount)
99
100
101static u16 *hfsplus_compose_lookup(u16 *p, u16 cc)
102{
103 int i, s, e;
104
105 s = 1;
106 e = p[1];
107 if (!e || cc < p[s * 2] || cc > p[e * 2])
108 return NULL;
109 do {
110 i = (s + e) / 2;
111 if (cc > p[i * 2])
112 s = i + 1;
113 else if (cc < p[i * 2])
114 e = i - 1;
115 else
116 return hfsplus_compose_table + p[i * 2 + 1];
117 } while (s <= e);
118 return NULL;
119}
120
Anton Salikhmetov2753cc22010-12-16 18:08:38 +0200121int hfsplus_uni2asc(struct super_block *sb,
122 const struct hfsplus_unistr *ustr,
123 char *astr, int *len_p)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700124{
125 const hfsplus_unichr *ip;
Christoph Hellwigdd73a012010-10-01 05:42:59 +0200126 struct nls_table *nls = HFSPLUS_SB(sb)->nls;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700127 u8 *op;
128 u16 cc, c0, c1;
129 u16 *ce1, *ce2;
130 int i, len, ustrlen, res, compose;
131
132 op = astr;
133 ip = ustr->unicode;
134 ustrlen = be16_to_cpu(ustr->length);
135 len = *len_p;
136 ce1 = NULL;
Christoph Hellwig84adede2010-10-01 05:45:20 +0200137 compose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700138
139 while (ustrlen > 0) {
140 c0 = be16_to_cpu(*ip++);
141 ustrlen--;
142 /* search for single decomposed char */
143 if (likely(compose))
144 ce1 = hfsplus_compose_lookup(hfsplus_compose_table, c0);
Anton Salikhmetov2b4f9ca2010-12-16 18:08:42 +0200145 if (ce1)
146 cc = ce1[0];
147 else
148 cc = 0;
149 if (cc) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700150 /* start of a possibly decomposed Hangul char */
151 if (cc != 0xffff)
152 goto done;
153 if (!ustrlen)
154 goto same;
155 c1 = be16_to_cpu(*ip) - Hangul_VBase;
156 if (c1 < Hangul_VCount) {
157 /* compose the Hangul char */
158 cc = (c0 - Hangul_LBase) * Hangul_VCount;
159 cc = (cc + c1) * Hangul_TCount;
160 cc += Hangul_SBase;
161 ip++;
162 ustrlen--;
163 if (!ustrlen)
164 goto done;
165 c1 = be16_to_cpu(*ip) - Hangul_TBase;
166 if (c1 > 0 && c1 < Hangul_TCount) {
167 cc += c1;
168 ip++;
169 ustrlen--;
170 }
171 goto done;
172 }
173 }
174 while (1) {
175 /* main loop for common case of not composed chars */
176 if (!ustrlen)
177 goto same;
178 c1 = be16_to_cpu(*ip);
179 if (likely(compose))
Anton Salikhmetov2753cc22010-12-16 18:08:38 +0200180 ce1 = hfsplus_compose_lookup(
181 hfsplus_compose_table, c1);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700182 if (ce1)
183 break;
184 switch (c0) {
185 case 0:
186 c0 = 0x2400;
187 break;
188 case '/':
189 c0 = ':';
190 break;
191 }
192 res = nls->uni2char(c0, op, len);
193 if (res < 0) {
194 if (res == -ENAMETOOLONG)
195 goto out;
196 *op = '?';
197 res = 1;
198 }
199 op += res;
200 len -= res;
201 c0 = c1;
202 ip++;
203 ustrlen--;
204 }
205 ce2 = hfsplus_compose_lookup(ce1, c0);
206 if (ce2) {
207 i = 1;
208 while (i < ustrlen) {
Anton Salikhmetov2753cc22010-12-16 18:08:38 +0200209 ce1 = hfsplus_compose_lookup(ce2,
210 be16_to_cpu(ip[i]));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700211 if (!ce1)
212 break;
213 i++;
214 ce2 = ce1;
215 }
Anton Salikhmetov2b4f9ca2010-12-16 18:08:42 +0200216 cc = ce2[0];
217 if (cc) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700218 ip += i;
219 ustrlen -= i;
220 goto done;
221 }
222 }
Anton Salikhmetov20b76432010-12-16 18:08:40 +0200223same:
Linus Torvalds1da177e2005-04-16 15:20:36 -0700224 switch (c0) {
225 case 0:
226 cc = 0x2400;
227 break;
228 case '/':
229 cc = ':';
230 break;
231 default:
232 cc = c0;
233 }
Anton Salikhmetov20b76432010-12-16 18:08:40 +0200234done:
Linus Torvalds1da177e2005-04-16 15:20:36 -0700235 res = nls->uni2char(cc, op, len);
236 if (res < 0) {
237 if (res == -ENAMETOOLONG)
238 goto out;
239 *op = '?';
240 res = 1;
241 }
242 op += res;
243 len -= res;
244 }
245 res = 0;
246out:
247 *len_p = (char *)op - astr;
248 return res;
249}
250
Duane Griffin1e96b7c2007-07-15 23:41:22 -0700251/*
252 * Convert one or more ASCII characters into a single unicode character.
253 * Returns the number of ASCII characters corresponding to the unicode char.
254 */
255static inline int asc2unichar(struct super_block *sb, const char *astr, int len,
256 wchar_t *uc)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700257{
Christoph Hellwigdd73a012010-10-01 05:42:59 +0200258 int size = HFSPLUS_SB(sb)->nls->char2uni(astr, len, uc);
Duane Griffin1e96b7c2007-07-15 23:41:22 -0700259 if (size <= 0) {
260 *uc = '?';
261 size = 1;
262 }
263 switch (*uc) {
264 case 0x2400:
265 *uc = 0;
266 break;
267 case ':':
268 *uc = '/';
269 break;
270 }
271 return size;
272}
273
274/* Decomposes a single unicode character. */
275static inline u16 *decompose_unichar(wchar_t uc, int *size)
276{
277 int off;
278
279 off = hfsplus_decompose_table[(uc >> 12) & 0xf];
280 if (off == 0 || off == 0xffff)
281 return NULL;
282
283 off = hfsplus_decompose_table[off + ((uc >> 8) & 0xf)];
284 if (!off)
285 return NULL;
286
287 off = hfsplus_decompose_table[off + ((uc >> 4) & 0xf)];
288 if (!off)
289 return NULL;
290
291 off = hfsplus_decompose_table[off + (uc & 0xf)];
292 *size = off & 3;
293 if (*size == 0)
294 return NULL;
295 return hfsplus_decompose_table + (off / 4);
296}
297
Vyacheslav Dubeyko324ef392013-02-27 17:03:04 -0800298int hfsplus_asc2uni(struct super_block *sb,
299 struct hfsplus_unistr *ustr, int max_unistr_len,
Duane Griffin1e96b7c2007-07-15 23:41:22 -0700300 const char *astr, int len)
301{
302 int size, dsize, decompose;
303 u16 *dstr, outlen = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700304 wchar_t c;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700305
Christoph Hellwig84adede2010-10-01 05:45:20 +0200306 decompose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags);
Vyacheslav Dubeyko324ef392013-02-27 17:03:04 -0800307 while (outlen < max_unistr_len && len > 0) {
Duane Griffin1e96b7c2007-07-15 23:41:22 -0700308 size = asc2unichar(sb, astr, len, &c);
309
Anton Salikhmetov2b4f9ca2010-12-16 18:08:42 +0200310 if (decompose)
311 dstr = decompose_unichar(c, &dsize);
312 else
313 dstr = NULL;
314 if (dstr) {
Vyacheslav Dubeyko324ef392013-02-27 17:03:04 -0800315 if (outlen + dsize > max_unistr_len)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700316 break;
317 do {
Duane Griffin1e96b7c2007-07-15 23:41:22 -0700318 ustr->unicode[outlen++] = cpu_to_be16(*dstr++);
319 } while (--dsize > 0);
320 } else
321 ustr->unicode[outlen++] = cpu_to_be16(c);
322
323 astr += size;
324 len -= size;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700325 }
326 ustr->length = cpu_to_be16(outlen);
327 if (len > 0)
328 return -ENAMETOOLONG;
329 return 0;
330}
Duane Griffind45bce82007-07-15 23:41:23 -0700331
332/*
333 * Hash a string to an integer as appropriate for the HFS+ filesystem.
334 * Composed unicode characters are decomposed and case-folding is performed
335 * if the appropriate bits are (un)set on the superblock.
336 */
Linus Torvaldsda53be12013-05-21 15:22:44 -0700337int hfsplus_hash_dentry(const struct dentry *dentry, struct qstr *str)
Duane Griffind45bce82007-07-15 23:41:23 -0700338{
339 struct super_block *sb = dentry->d_sb;
340 const char *astr;
341 const u16 *dstr;
Andrew Morton8aa84ab2008-02-08 04:20:52 -0800342 int casefold, decompose, size, len;
Duane Griffind45bce82007-07-15 23:41:23 -0700343 unsigned long hash;
344 wchar_t c;
345 u16 c2;
346
Christoph Hellwig84adede2010-10-01 05:45:20 +0200347 casefold = test_bit(HFSPLUS_SB_CASEFOLD, &HFSPLUS_SB(sb)->flags);
348 decompose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags);
Linus Torvalds8387ff22016-06-10 07:51:30 -0700349 hash = init_name_hash(dentry);
Duane Griffind45bce82007-07-15 23:41:23 -0700350 astr = str->name;
351 len = str->len;
352 while (len > 0) {
Andrew Morton8aa84ab2008-02-08 04:20:52 -0800353 int uninitialized_var(dsize);
Duane Griffind45bce82007-07-15 23:41:23 -0700354 size = asc2unichar(sb, astr, len, &c);
355 astr += size;
356 len -= size;
357
Anton Salikhmetov2b4f9ca2010-12-16 18:08:42 +0200358 if (decompose)
359 dstr = decompose_unichar(c, &dsize);
360 else
361 dstr = NULL;
362 if (dstr) {
Duane Griffind45bce82007-07-15 23:41:23 -0700363 do {
364 c2 = *dstr++;
Anton Salikhmetov2b4f9ca2010-12-16 18:08:42 +0200365 if (casefold)
366 c2 = case_fold(c2);
367 if (!casefold || c2)
Duane Griffind45bce82007-07-15 23:41:23 -0700368 hash = partial_name_hash(c2, hash);
369 } while (--dsize > 0);
370 } else {
371 c2 = c;
Anton Salikhmetov2b4f9ca2010-12-16 18:08:42 +0200372 if (casefold)
373 c2 = case_fold(c2);
374 if (!casefold || c2)
Duane Griffind45bce82007-07-15 23:41:23 -0700375 hash = partial_name_hash(c2, hash);
376 }
377 }
378 str->hash = end_name_hash(hash);
379
380 return 0;
381}
382
383/*
384 * Compare strings with HFS+ filename ordering.
385 * Composed unicode characters are decomposed and case-folding is performed
386 * if the appropriate bits are (un)set on the superblock.
387 */
Al Viro6fa67e72016-07-31 16:37:25 -0400388int hfsplus_compare_dentry(const struct dentry *dentry,
Nick Piggin621e1552011-01-07 17:49:27 +1100389 unsigned int len, const char *str, const struct qstr *name)
Duane Griffind45bce82007-07-15 23:41:23 -0700390{
Al Virod3fe1982016-07-29 18:23:59 -0400391 struct super_block *sb = dentry->d_sb;
Duane Griffind45bce82007-07-15 23:41:23 -0700392 int casefold, decompose, size;
393 int dsize1, dsize2, len1, len2;
394 const u16 *dstr1, *dstr2;
395 const char *astr1, *astr2;
396 u16 c1, c2;
397 wchar_t c;
398
Christoph Hellwig84adede2010-10-01 05:45:20 +0200399 casefold = test_bit(HFSPLUS_SB_CASEFOLD, &HFSPLUS_SB(sb)->flags);
400 decompose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags);
Nick Piggin621e1552011-01-07 17:49:27 +1100401 astr1 = str;
402 len1 = len;
403 astr2 = name->name;
404 len2 = name->len;
Duane Griffind45bce82007-07-15 23:41:23 -0700405 dsize1 = dsize2 = 0;
406 dstr1 = dstr2 = NULL;
407
408 while (len1 > 0 && len2 > 0) {
409 if (!dsize1) {
410 size = asc2unichar(sb, astr1, len1, &c);
411 astr1 += size;
412 len1 -= size;
413
Anton Salikhmetov2753cc22010-12-16 18:08:38 +0200414 if (decompose)
415 dstr1 = decompose_unichar(c, &dsize1);
416 if (!decompose || !dstr1) {
Duane Griffind45bce82007-07-15 23:41:23 -0700417 c1 = c;
418 dstr1 = &c1;
419 dsize1 = 1;
420 }
421 }
422
423 if (!dsize2) {
424 size = asc2unichar(sb, astr2, len2, &c);
425 astr2 += size;
426 len2 -= size;
427
Anton Salikhmetov2753cc22010-12-16 18:08:38 +0200428 if (decompose)
429 dstr2 = decompose_unichar(c, &dsize2);
430 if (!decompose || !dstr2) {
Duane Griffind45bce82007-07-15 23:41:23 -0700431 c2 = c;
432 dstr2 = &c2;
433 dsize2 = 1;
434 }
435 }
436
437 c1 = *dstr1;
438 c2 = *dstr2;
439 if (casefold) {
Anton Salikhmetov2b4f9ca2010-12-16 18:08:42 +0200440 c1 = case_fold(c1);
441 if (!c1) {
Duane Griffind45bce82007-07-15 23:41:23 -0700442 dstr1++;
443 dsize1--;
444 continue;
445 }
Anton Salikhmetov2b4f9ca2010-12-16 18:08:42 +0200446 c2 = case_fold(c2);
447 if (!c2) {
Duane Griffind45bce82007-07-15 23:41:23 -0700448 dstr2++;
449 dsize2--;
450 continue;
451 }
452 }
453 if (c1 < c2)
454 return -1;
455 else if (c1 > c2)
456 return 1;
457
458 dstr1++;
459 dsize1--;
460 dstr2++;
461 dsize2--;
462 }
463
464 if (len1 < len2)
465 return -1;
466 if (len1 > len2)
467 return 1;
468 return 0;
469}