blob: dfa90c21948f82a34b6eedfab2bfdd8b1c996ef3 [file] [log] [blame]
Greg Kroah-Hartmanb2441312017-11-01 15:07:57 +01001// SPDX-License-Identifier: GPL-2.0
Linus Torvalds1da177e2005-04-16 15:20:36 -07002/*
3 * linux/fs/hfsplus/unicode.c
4 *
5 * Copyright (C) 2001
6 * Brad Boyer (flar@allandria.com)
7 * (C) 2003 Ardis Technologies <roman@ardistech.com>
8 *
9 * Handler routines for unicode strings
10 */
11
12#include <linux/types.h>
13#include <linux/nls.h>
14#include "hfsplus_fs.h"
15#include "hfsplus_raw.h"
16
17/* Fold the case of a unicode char, given the 16 bit value */
18/* Returns folded char, or 0 if ignorable */
19static inline u16 case_fold(u16 c)
20{
Anton Salikhmetov20b76432010-12-16 18:08:40 +020021 u16 tmp;
Linus Torvalds1da177e2005-04-16 15:20:36 -070022
Anton Salikhmetov20b76432010-12-16 18:08:40 +020023 tmp = hfsplus_case_fold_table[c >> 8];
24 if (tmp)
25 tmp = hfsplus_case_fold_table[tmp + (c & 0xff)];
26 else
27 tmp = c;
28 return tmp;
Linus Torvalds1da177e2005-04-16 15:20:36 -070029}
30
31/* Compare unicode strings, return values like normal strcmp */
David Elliott2179d372006-01-18 17:43:08 -080032int hfsplus_strcasecmp(const struct hfsplus_unistr *s1,
33 const struct hfsplus_unistr *s2)
Linus Torvalds1da177e2005-04-16 15:20:36 -070034{
35 u16 len1, len2, c1, c2;
36 const hfsplus_unichr *p1, *p2;
37
38 len1 = be16_to_cpu(s1->length);
39 len2 = be16_to_cpu(s2->length);
40 p1 = s1->unicode;
41 p2 = s2->unicode;
42
43 while (1) {
44 c1 = c2 = 0;
45
46 while (len1 && !c1) {
47 c1 = case_fold(be16_to_cpu(*p1));
48 p1++;
49 len1--;
50 }
51 while (len2 && !c2) {
52 c2 = case_fold(be16_to_cpu(*p2));
53 p2++;
54 len2--;
55 }
56
57 if (c1 != c2)
58 return (c1 < c2) ? -1 : 1;
59 if (!c1 && !c2)
60 return 0;
61 }
62}
63
David Elliott2179d372006-01-18 17:43:08 -080064/* Compare names as a sequence of 16-bit unsigned integers */
65int hfsplus_strcmp(const struct hfsplus_unistr *s1,
66 const struct hfsplus_unistr *s2)
67{
68 u16 len1, len2, c1, c2;
69 const hfsplus_unichr *p1, *p2;
70 int len;
71
72 len1 = be16_to_cpu(s1->length);
73 len2 = be16_to_cpu(s2->length);
74 p1 = s1->unicode;
75 p2 = s2->unicode;
76
77 for (len = min(len1, len2); len > 0; len--) {
78 c1 = be16_to_cpu(*p1);
79 c2 = be16_to_cpu(*p2);
80 if (c1 != c2)
81 return c1 < c2 ? -1 : 1;
82 p1++;
83 p2++;
84 }
85
86 return len1 < len2 ? -1 :
87 len1 > len2 ? 1 : 0;
88}
89
90
Linus Torvalds1da177e2005-04-16 15:20:36 -070091#define Hangul_SBase 0xac00
92#define Hangul_LBase 0x1100
93#define Hangul_VBase 0x1161
94#define Hangul_TBase 0x11a7
95#define Hangul_SCount 11172
96#define Hangul_LCount 19
97#define Hangul_VCount 21
98#define Hangul_TCount 28
99#define Hangul_NCount (Hangul_VCount * Hangul_TCount)
100
101
102static u16 *hfsplus_compose_lookup(u16 *p, u16 cc)
103{
104 int i, s, e;
105
106 s = 1;
107 e = p[1];
108 if (!e || cc < p[s * 2] || cc > p[e * 2])
109 return NULL;
110 do {
111 i = (s + e) / 2;
112 if (cc > p[i * 2])
113 s = i + 1;
114 else if (cc < p[i * 2])
115 e = i - 1;
116 else
117 return hfsplus_compose_table + p[i * 2 + 1];
118 } while (s <= e);
119 return NULL;
120}
121
Anton Salikhmetov2753cc22010-12-16 18:08:38 +0200122int hfsplus_uni2asc(struct super_block *sb,
123 const struct hfsplus_unistr *ustr,
124 char *astr, int *len_p)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700125{
126 const hfsplus_unichr *ip;
Christoph Hellwigdd73a012010-10-01 05:42:59 +0200127 struct nls_table *nls = HFSPLUS_SB(sb)->nls;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700128 u8 *op;
129 u16 cc, c0, c1;
130 u16 *ce1, *ce2;
131 int i, len, ustrlen, res, compose;
132
133 op = astr;
134 ip = ustr->unicode;
135 ustrlen = be16_to_cpu(ustr->length);
136 len = *len_p;
137 ce1 = NULL;
Christoph Hellwig84adede2010-10-01 05:45:20 +0200138 compose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700139
140 while (ustrlen > 0) {
141 c0 = be16_to_cpu(*ip++);
142 ustrlen--;
143 /* search for single decomposed char */
144 if (likely(compose))
145 ce1 = hfsplus_compose_lookup(hfsplus_compose_table, c0);
Anton Salikhmetov2b4f9ca2010-12-16 18:08:42 +0200146 if (ce1)
147 cc = ce1[0];
148 else
149 cc = 0;
150 if (cc) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700151 /* start of a possibly decomposed Hangul char */
152 if (cc != 0xffff)
153 goto done;
154 if (!ustrlen)
155 goto same;
156 c1 = be16_to_cpu(*ip) - Hangul_VBase;
157 if (c1 < Hangul_VCount) {
158 /* compose the Hangul char */
159 cc = (c0 - Hangul_LBase) * Hangul_VCount;
160 cc = (cc + c1) * Hangul_TCount;
161 cc += Hangul_SBase;
162 ip++;
163 ustrlen--;
164 if (!ustrlen)
165 goto done;
166 c1 = be16_to_cpu(*ip) - Hangul_TBase;
167 if (c1 > 0 && c1 < Hangul_TCount) {
168 cc += c1;
169 ip++;
170 ustrlen--;
171 }
172 goto done;
173 }
174 }
175 while (1) {
176 /* main loop for common case of not composed chars */
177 if (!ustrlen)
178 goto same;
179 c1 = be16_to_cpu(*ip);
180 if (likely(compose))
Anton Salikhmetov2753cc22010-12-16 18:08:38 +0200181 ce1 = hfsplus_compose_lookup(
182 hfsplus_compose_table, c1);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700183 if (ce1)
184 break;
185 switch (c0) {
186 case 0:
187 c0 = 0x2400;
188 break;
189 case '/':
190 c0 = ':';
191 break;
192 }
193 res = nls->uni2char(c0, op, len);
194 if (res < 0) {
195 if (res == -ENAMETOOLONG)
196 goto out;
197 *op = '?';
198 res = 1;
199 }
200 op += res;
201 len -= res;
202 c0 = c1;
203 ip++;
204 ustrlen--;
205 }
206 ce2 = hfsplus_compose_lookup(ce1, c0);
207 if (ce2) {
208 i = 1;
209 while (i < ustrlen) {
Anton Salikhmetov2753cc22010-12-16 18:08:38 +0200210 ce1 = hfsplus_compose_lookup(ce2,
211 be16_to_cpu(ip[i]));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700212 if (!ce1)
213 break;
214 i++;
215 ce2 = ce1;
216 }
Anton Salikhmetov2b4f9ca2010-12-16 18:08:42 +0200217 cc = ce2[0];
218 if (cc) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700219 ip += i;
220 ustrlen -= i;
221 goto done;
222 }
223 }
Anton Salikhmetov20b76432010-12-16 18:08:40 +0200224same:
Linus Torvalds1da177e2005-04-16 15:20:36 -0700225 switch (c0) {
226 case 0:
227 cc = 0x2400;
228 break;
229 case '/':
230 cc = ':';
231 break;
232 default:
233 cc = c0;
234 }
Anton Salikhmetov20b76432010-12-16 18:08:40 +0200235done:
Linus Torvalds1da177e2005-04-16 15:20:36 -0700236 res = nls->uni2char(cc, op, len);
237 if (res < 0) {
238 if (res == -ENAMETOOLONG)
239 goto out;
240 *op = '?';
241 res = 1;
242 }
243 op += res;
244 len -= res;
245 }
246 res = 0;
247out:
248 *len_p = (char *)op - astr;
249 return res;
250}
251
Duane Griffin1e96b7c2007-07-15 23:41:22 -0700252/*
253 * Convert one or more ASCII characters into a single unicode character.
254 * Returns the number of ASCII characters corresponding to the unicode char.
255 */
256static inline int asc2unichar(struct super_block *sb, const char *astr, int len,
257 wchar_t *uc)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700258{
Christoph Hellwigdd73a012010-10-01 05:42:59 +0200259 int size = HFSPLUS_SB(sb)->nls->char2uni(astr, len, uc);
Duane Griffin1e96b7c2007-07-15 23:41:22 -0700260 if (size <= 0) {
261 *uc = '?';
262 size = 1;
263 }
264 switch (*uc) {
265 case 0x2400:
266 *uc = 0;
267 break;
268 case ':':
269 *uc = '/';
270 break;
271 }
272 return size;
273}
274
275/* Decomposes a single unicode character. */
276static inline u16 *decompose_unichar(wchar_t uc, int *size)
277{
278 int off;
279
280 off = hfsplus_decompose_table[(uc >> 12) & 0xf];
281 if (off == 0 || off == 0xffff)
282 return NULL;
283
284 off = hfsplus_decompose_table[off + ((uc >> 8) & 0xf)];
285 if (!off)
286 return NULL;
287
288 off = hfsplus_decompose_table[off + ((uc >> 4) & 0xf)];
289 if (!off)
290 return NULL;
291
292 off = hfsplus_decompose_table[off + (uc & 0xf)];
293 *size = off & 3;
294 if (*size == 0)
295 return NULL;
296 return hfsplus_decompose_table + (off / 4);
297}
298
Vyacheslav Dubeyko324ef392013-02-27 17:03:04 -0800299int hfsplus_asc2uni(struct super_block *sb,
300 struct hfsplus_unistr *ustr, int max_unistr_len,
Duane Griffin1e96b7c2007-07-15 23:41:22 -0700301 const char *astr, int len)
302{
303 int size, dsize, decompose;
304 u16 *dstr, outlen = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700305 wchar_t c;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700306
Christoph Hellwig84adede2010-10-01 05:45:20 +0200307 decompose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags);
Vyacheslav Dubeyko324ef392013-02-27 17:03:04 -0800308 while (outlen < max_unistr_len && len > 0) {
Duane Griffin1e96b7c2007-07-15 23:41:22 -0700309 size = asc2unichar(sb, astr, len, &c);
310
Anton Salikhmetov2b4f9ca2010-12-16 18:08:42 +0200311 if (decompose)
312 dstr = decompose_unichar(c, &dsize);
313 else
314 dstr = NULL;
315 if (dstr) {
Vyacheslav Dubeyko324ef392013-02-27 17:03:04 -0800316 if (outlen + dsize > max_unistr_len)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700317 break;
318 do {
Duane Griffin1e96b7c2007-07-15 23:41:22 -0700319 ustr->unicode[outlen++] = cpu_to_be16(*dstr++);
320 } while (--dsize > 0);
321 } else
322 ustr->unicode[outlen++] = cpu_to_be16(c);
323
324 astr += size;
325 len -= size;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700326 }
327 ustr->length = cpu_to_be16(outlen);
328 if (len > 0)
329 return -ENAMETOOLONG;
330 return 0;
331}
Duane Griffind45bce82007-07-15 23:41:23 -0700332
333/*
334 * Hash a string to an integer as appropriate for the HFS+ filesystem.
335 * Composed unicode characters are decomposed and case-folding is performed
336 * if the appropriate bits are (un)set on the superblock.
337 */
Linus Torvaldsda53be12013-05-21 15:22:44 -0700338int hfsplus_hash_dentry(const struct dentry *dentry, struct qstr *str)
Duane Griffind45bce82007-07-15 23:41:23 -0700339{
340 struct super_block *sb = dentry->d_sb;
341 const char *astr;
342 const u16 *dstr;
Andrew Morton8aa84ab2008-02-08 04:20:52 -0800343 int casefold, decompose, size, len;
Duane Griffind45bce82007-07-15 23:41:23 -0700344 unsigned long hash;
345 wchar_t c;
346 u16 c2;
347
Christoph Hellwig84adede2010-10-01 05:45:20 +0200348 casefold = test_bit(HFSPLUS_SB_CASEFOLD, &HFSPLUS_SB(sb)->flags);
349 decompose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags);
Linus Torvalds8387ff22016-06-10 07:51:30 -0700350 hash = init_name_hash(dentry);
Duane Griffind45bce82007-07-15 23:41:23 -0700351 astr = str->name;
352 len = str->len;
353 while (len > 0) {
Andrew Morton8aa84ab2008-02-08 04:20:52 -0800354 int uninitialized_var(dsize);
Duane Griffind45bce82007-07-15 23:41:23 -0700355 size = asc2unichar(sb, astr, len, &c);
356 astr += size;
357 len -= size;
358
Anton Salikhmetov2b4f9ca2010-12-16 18:08:42 +0200359 if (decompose)
360 dstr = decompose_unichar(c, &dsize);
361 else
362 dstr = NULL;
363 if (dstr) {
Duane Griffind45bce82007-07-15 23:41:23 -0700364 do {
365 c2 = *dstr++;
Anton Salikhmetov2b4f9ca2010-12-16 18:08:42 +0200366 if (casefold)
367 c2 = case_fold(c2);
368 if (!casefold || c2)
Duane Griffind45bce82007-07-15 23:41:23 -0700369 hash = partial_name_hash(c2, hash);
370 } while (--dsize > 0);
371 } else {
372 c2 = c;
Anton Salikhmetov2b4f9ca2010-12-16 18:08:42 +0200373 if (casefold)
374 c2 = case_fold(c2);
375 if (!casefold || c2)
Duane Griffind45bce82007-07-15 23:41:23 -0700376 hash = partial_name_hash(c2, hash);
377 }
378 }
379 str->hash = end_name_hash(hash);
380
381 return 0;
382}
383
384/*
385 * Compare strings with HFS+ filename ordering.
386 * Composed unicode characters are decomposed and case-folding is performed
387 * if the appropriate bits are (un)set on the superblock.
388 */
Al Viro6fa67e72016-07-31 16:37:25 -0400389int hfsplus_compare_dentry(const struct dentry *dentry,
Nick Piggin621e1552011-01-07 17:49:27 +1100390 unsigned int len, const char *str, const struct qstr *name)
Duane Griffind45bce82007-07-15 23:41:23 -0700391{
Al Virod3fe1982016-07-29 18:23:59 -0400392 struct super_block *sb = dentry->d_sb;
Duane Griffind45bce82007-07-15 23:41:23 -0700393 int casefold, decompose, size;
394 int dsize1, dsize2, len1, len2;
395 const u16 *dstr1, *dstr2;
396 const char *astr1, *astr2;
397 u16 c1, c2;
398 wchar_t c;
399
Christoph Hellwig84adede2010-10-01 05:45:20 +0200400 casefold = test_bit(HFSPLUS_SB_CASEFOLD, &HFSPLUS_SB(sb)->flags);
401 decompose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags);
Nick Piggin621e1552011-01-07 17:49:27 +1100402 astr1 = str;
403 len1 = len;
404 astr2 = name->name;
405 len2 = name->len;
Duane Griffind45bce82007-07-15 23:41:23 -0700406 dsize1 = dsize2 = 0;
407 dstr1 = dstr2 = NULL;
408
409 while (len1 > 0 && len2 > 0) {
410 if (!dsize1) {
411 size = asc2unichar(sb, astr1, len1, &c);
412 astr1 += size;
413 len1 -= size;
414
Anton Salikhmetov2753cc22010-12-16 18:08:38 +0200415 if (decompose)
416 dstr1 = decompose_unichar(c, &dsize1);
417 if (!decompose || !dstr1) {
Duane Griffind45bce82007-07-15 23:41:23 -0700418 c1 = c;
419 dstr1 = &c1;
420 dsize1 = 1;
421 }
422 }
423
424 if (!dsize2) {
425 size = asc2unichar(sb, astr2, len2, &c);
426 astr2 += size;
427 len2 -= size;
428
Anton Salikhmetov2753cc22010-12-16 18:08:38 +0200429 if (decompose)
430 dstr2 = decompose_unichar(c, &dsize2);
431 if (!decompose || !dstr2) {
Duane Griffind45bce82007-07-15 23:41:23 -0700432 c2 = c;
433 dstr2 = &c2;
434 dsize2 = 1;
435 }
436 }
437
438 c1 = *dstr1;
439 c2 = *dstr2;
440 if (casefold) {
Anton Salikhmetov2b4f9ca2010-12-16 18:08:42 +0200441 c1 = case_fold(c1);
442 if (!c1) {
Duane Griffind45bce82007-07-15 23:41:23 -0700443 dstr1++;
444 dsize1--;
445 continue;
446 }
Anton Salikhmetov2b4f9ca2010-12-16 18:08:42 +0200447 c2 = case_fold(c2);
448 if (!c2) {
Duane Griffind45bce82007-07-15 23:41:23 -0700449 dstr2++;
450 dsize2--;
451 continue;
452 }
453 }
454 if (c1 < c2)
455 return -1;
456 else if (c1 > c2)
457 return 1;
458
459 dstr1++;
460 dsize1--;
461 dstr2++;
462 dsize2--;
463 }
464
465 if (len1 < len2)
466 return -1;
467 if (len1 > len2)
468 return 1;
469 return 0;
470}