blob: bdc52cb9a676db0a9041d7e198810ce8df2aca9a [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * cifs_unicode: Unicode kernel case support
3 *
4 * Function:
5 * Convert a unicode character to upper or lower case using
6 * compressed tables.
7 *
Steve Frenchd185cda2009-04-30 17:45:10 +00008 * Copyright (c) International Business Machines Corp., 2000,2009
Linus Torvalds1da177e2005-04-16 15:20:36 -07009 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
Steve Frenchd38d8c72007-06-28 19:44:13 +000012 * the Free Software Foundation; either version 2 of the License, or
Linus Torvalds1da177e2005-04-16 15:20:36 -070013 * (at your option) any later version.
Steve Frenchd38d8c72007-06-28 19:44:13 +000014 *
Linus Torvalds1da177e2005-04-16 15:20:36 -070015 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
18 * the GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
Steve Frenchd38d8c72007-06-28 19:44:13 +000021 * along with this program; if not, write to the Free Software
Linus Torvalds1da177e2005-04-16 15:20:36 -070022 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 *
24 *
25 * Notes:
26 * These APIs are based on the C library functions. The semantics
27 * should match the C functions but with expanded size operands.
28 *
29 * The upper/lower functions are based on a table created by mkupr.
30 * This is a compressed table of upper and lower case conversion.
31 *
32 */
Igor Druzhininbf4f1212010-08-20 00:27:12 +040033#ifndef _CIFS_UNICODE_H
34#define _CIFS_UNICODE_H
Linus Torvalds1da177e2005-04-16 15:20:36 -070035
36#include <asm/byteorder.h>
37#include <linux/types.h>
38#include <linux/nls.h>
39
40#define UNIUPR_NOLOWER /* Example to not expand lower case tables */
41
Jeff Layton66345f52009-04-30 06:45:08 -040042/*
43 * Windows maps these to the user defined 16 bit Unicode range since they are
44 * reserved symbols (along with \ and /), otherwise illegal to store
45 * in filenames in NTFS
46 */
Jeff Layton581ade42011-04-05 15:02:37 -040047#define UNI_ASTERISK (__u16) ('*' + 0xF000)
Jeff Layton66345f52009-04-30 06:45:08 -040048#define UNI_QUESTION (__u16) ('?' + 0xF000)
49#define UNI_COLON (__u16) (':' + 0xF000)
50#define UNI_GRTRTHAN (__u16) ('>' + 0xF000)
51#define UNI_LESSTHAN (__u16) ('<' + 0xF000)
52#define UNI_PIPE (__u16) ('|' + 0xF000)
53#define UNI_SLASH (__u16) ('\\' + 0xF000)
54
Steve Frenchb6938552014-09-25 13:20:05 -050055/*
56 * Macs use an older "SFM" mapping of the symbols above. Fortunately it does
57 * not conflict (although almost does) with the mapping above.
58 */
59
60#define SFM_ASTERISK ((__u16) 0xF021)
61#define SFM_QUESTION ((__u16) 0xF025)
62#define SFM_COLON ((__u16) 0xF022)
63#define SFM_GRTRTHAN ((__u16) 0xF024)
64#define SFM_LESSTHAN ((__u16) 0xF023)
65#define SFM_PIPE ((__u16) 0xF027)
66#define SFM_SLASH ((__u16) 0xF026)
67
68/*
69 * Mapping mechanism to use when one of the seven reserved characters is
70 * encountered. We can only map using one of the mechanisms at a time
71 * since otherwise readdir could return directory entries which we would
72 * not be able to open
73 *
74 * NO_MAP_UNI_RSVD = do not perform any remapping of the character
75 * SFM_MAP_UNI_RSVD = map reserved characters using SFM scheme (MAC compatible)
76 * SFU_MAP_UNI_RSVD = map reserved characters ala SFU ("mapchars" option)
77 *
78 */
79#define NO_MAP_UNI_RSVD 0
80#define SFM_MAP_UNI_RSVD 1
81#define SFU_MAP_UNI_RSVD 2
82
Linus Torvalds1da177e2005-04-16 15:20:36 -070083/* Just define what we want from uniupr.h. We don't want to define the tables
84 * in each source file.
85 */
86#ifndef UNICASERANGE_DEFINED
87struct UniCaseRange {
88 wchar_t start;
89 wchar_t end;
90 signed char *table;
91};
92#endif /* UNICASERANGE_DEFINED */
93
94#ifndef UNIUPR_NOUPPER
95extern signed char CifsUniUpperTable[512];
96extern const struct UniCaseRange CifsUniUpperRange[];
97#endif /* UNIUPR_NOUPPER */
98
99#ifndef UNIUPR_NOLOWER
Igor Druzhininbf4f1212010-08-20 00:27:12 +0400100extern signed char CifsUniLowerTable[512];
101extern const struct UniCaseRange CifsUniLowerRange[];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700102#endif /* UNIUPR_NOLOWER */
103
104#ifdef __KERNEL__
Steve Frenchacbbb762012-01-18 22:32:33 -0600105int cifs_from_utf16(char *to, const __le16 *from, int tolen, int fromlen,
Steve Frenchb6938552014-09-25 13:20:05 -0500106 const struct nls_table *cp, int map_type);
Steve Frenchacbbb762012-01-18 22:32:33 -0600107int cifs_utf16_bytes(const __le16 *from, int maxbytes,
108 const struct nls_table *codepage);
109int cifs_strtoUTF16(__le16 *, const char *, int, const struct nls_table *);
110char *cifs_strndup_from_utf16(const char *src, const int maxlen,
111 const bool is_unicode,
112 const struct nls_table *codepage);
113extern int cifsConvertToUTF16(__le16 *target, const char *source, int maxlen,
114 const struct nls_table *cp, int mapChars);
Steve French2baa2682014-09-27 02:19:01 -0500115extern int cifs_remap(struct cifs_sb_info *cifs_sb);
Pavel Shilovsky2503a0d2011-12-26 22:58:46 +0400116#ifdef CONFIG_CIFS_SMB2
117extern __le16 *cifs_strndup_to_utf16(const char *src, const int maxlen,
118 int *utf16_len, const struct nls_table *cp,
119 int remap);
120#endif /* CONFIG_CIFS_SMB2 */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700121#endif
122
Jeff Laytonc2ccf532013-09-05 08:38:11 -0400123wchar_t cifs_toupper(wchar_t in);
124
Linus Torvalds1da177e2005-04-16 15:20:36 -0700125/*
126 * UniStrcat: Concatenate the second string to the first
127 *
128 * Returns:
129 * Address of the first string
130 */
131static inline wchar_t *
Steve French50c2f752007-07-13 00:33:32 +0000132UniStrcat(wchar_t *ucs1, const wchar_t *ucs2)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700133{
134 wchar_t *anchor = ucs1; /* save a pointer to start of ucs1 */
135
136 while (*ucs1++) ; /* To end of first string */
137 ucs1--; /* Return to the null */
138 while ((*ucs1++ = *ucs2++)) ; /* copy string 2 over */
139 return anchor;
140}
141
142/*
143 * UniStrchr: Find a character in a string
144 *
145 * Returns:
146 * Address of first occurrence of character in string
147 * or NULL if the character is not in the string
148 */
149static inline wchar_t *
Steve French50c2f752007-07-13 00:33:32 +0000150UniStrchr(const wchar_t *ucs, wchar_t uc)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700151{
152 while ((*ucs != uc) && *ucs)
153 ucs++;
154
155 if (*ucs == uc)
156 return (wchar_t *) ucs;
157 return NULL;
158}
159
160/*
161 * UniStrcmp: Compare two strings
162 *
163 * Returns:
164 * < 0: First string is less than second
165 * = 0: Strings are equal
166 * > 0: First string is greater than second
167 */
168static inline int
Steve French50c2f752007-07-13 00:33:32 +0000169UniStrcmp(const wchar_t *ucs1, const wchar_t *ucs2)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700170{
171 while ((*ucs1 == *ucs2) && *ucs1) {
172 ucs1++;
173 ucs2++;
174 }
175 return (int) *ucs1 - (int) *ucs2;
176}
177
178/*
179 * UniStrcpy: Copy a string
180 */
181static inline wchar_t *
Steve French50c2f752007-07-13 00:33:32 +0000182UniStrcpy(wchar_t *ucs1, const wchar_t *ucs2)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700183{
184 wchar_t *anchor = ucs1; /* save the start of result string */
185
186 while ((*ucs1++ = *ucs2++)) ;
187 return anchor;
188}
189
190/*
191 * UniStrlen: Return the length of a string (in 16 bit Unicode chars not bytes)
192 */
193static inline size_t
Steve French50c2f752007-07-13 00:33:32 +0000194UniStrlen(const wchar_t *ucs1)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700195{
196 int i = 0;
197
198 while (*ucs1++)
199 i++;
200 return i;
201}
202
203/*
Steve Frenchd38d8c72007-06-28 19:44:13 +0000204 * UniStrnlen: Return the length (in 16 bit Unicode chars not bytes) of a
205 * string (length limited)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700206 */
207static inline size_t
Steve French50c2f752007-07-13 00:33:32 +0000208UniStrnlen(const wchar_t *ucs1, int maxlen)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700209{
210 int i = 0;
211
212 while (*ucs1++) {
213 i++;
214 if (i >= maxlen)
215 break;
216 }
217 return i;
218}
219
220/*
221 * UniStrncat: Concatenate length limited string
222 */
223static inline wchar_t *
Steve French50c2f752007-07-13 00:33:32 +0000224UniStrncat(wchar_t *ucs1, const wchar_t *ucs2, size_t n)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700225{
226 wchar_t *anchor = ucs1; /* save pointer to string 1 */
227
228 while (*ucs1++) ;
229 ucs1--; /* point to null terminator of s1 */
230 while (n-- && (*ucs1 = *ucs2)) { /* copy s2 after s1 */
231 ucs1++;
232 ucs2++;
233 }
234 *ucs1 = 0; /* Null terminate the result */
235 return (anchor);
236}
237
238/*
239 * UniStrncmp: Compare length limited string
240 */
241static inline int
Steve French50c2f752007-07-13 00:33:32 +0000242UniStrncmp(const wchar_t *ucs1, const wchar_t *ucs2, size_t n)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700243{
244 if (!n)
245 return 0; /* Null strings are equal */
246 while ((*ucs1 == *ucs2) && *ucs1 && --n) {
247 ucs1++;
248 ucs2++;
249 }
250 return (int) *ucs1 - (int) *ucs2;
251}
252
253/*
254 * UniStrncmp_le: Compare length limited string - native to little-endian
255 */
256static inline int
Steve French50c2f752007-07-13 00:33:32 +0000257UniStrncmp_le(const wchar_t *ucs1, const wchar_t *ucs2, size_t n)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700258{
259 if (!n)
260 return 0; /* Null strings are equal */
261 while ((*ucs1 == __le16_to_cpu(*ucs2)) && *ucs1 && --n) {
262 ucs1++;
263 ucs2++;
264 }
265 return (int) *ucs1 - (int) __le16_to_cpu(*ucs2);
266}
267
268/*
269 * UniStrncpy: Copy length limited string with pad
270 */
271static inline wchar_t *
Steve French50c2f752007-07-13 00:33:32 +0000272UniStrncpy(wchar_t *ucs1, const wchar_t *ucs2, size_t n)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700273{
274 wchar_t *anchor = ucs1;
275
276 while (n-- && *ucs2) /* Copy the strings */
277 *ucs1++ = *ucs2++;
278
279 n++;
280 while (n--) /* Pad with nulls */
281 *ucs1++ = 0;
282 return anchor;
283}
284
285/*
286 * UniStrncpy_le: Copy length limited string with pad to little-endian
287 */
288static inline wchar_t *
Steve French50c2f752007-07-13 00:33:32 +0000289UniStrncpy_le(wchar_t *ucs1, const wchar_t *ucs2, size_t n)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700290{
291 wchar_t *anchor = ucs1;
292
293 while (n-- && *ucs2) /* Copy the strings */
294 *ucs1++ = __le16_to_cpu(*ucs2++);
295
296 n++;
297 while (n--) /* Pad with nulls */
298 *ucs1++ = 0;
299 return anchor;
300}
301
302/*
303 * UniStrstr: Find a string in a string
304 *
305 * Returns:
306 * Address of first match found
307 * NULL if no matching string is found
308 */
309static inline wchar_t *
Steve French50c2f752007-07-13 00:33:32 +0000310UniStrstr(const wchar_t *ucs1, const wchar_t *ucs2)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700311{
312 const wchar_t *anchor1 = ucs1;
313 const wchar_t *anchor2 = ucs2;
314
315 while (*ucs1) {
Steve Frenchad7a2922008-02-07 23:25:02 +0000316 if (*ucs1 == *ucs2) {
317 /* Partial match found */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700318 ucs1++;
319 ucs2++;
320 } else {
321 if (!*ucs2) /* Match found */
322 return (wchar_t *) anchor1;
323 ucs1 = ++anchor1; /* No match */
324 ucs2 = anchor2;
325 }
326 }
327
328 if (!*ucs2) /* Both end together */
329 return (wchar_t *) anchor1; /* Match found */
330 return NULL; /* No match */
331}
332
333#ifndef UNIUPR_NOUPPER
334/*
335 * UniToupper: Convert a unicode character to upper case
336 */
337static inline wchar_t
338UniToupper(register wchar_t uc)
339{
340 register const struct UniCaseRange *rp;
341
Steve Frenchad7a2922008-02-07 23:25:02 +0000342 if (uc < sizeof(CifsUniUpperTable)) {
343 /* Latin characters */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700344 return uc + CifsUniUpperTable[uc]; /* Use base tables */
345 } else {
346 rp = CifsUniUpperRange; /* Use range tables */
347 while (rp->start) {
348 if (uc < rp->start) /* Before start of range */
349 return uc; /* Uppercase = input */
350 if (uc <= rp->end) /* In range */
351 return uc + rp->table[uc - rp->start];
352 rp++; /* Try next range */
353 }
354 }
355 return uc; /* Past last range */
356}
357
358/*
359 * UniStrupr: Upper case a unicode string
360 */
Steve Frenchfdf96a92013-06-25 14:03:16 -0500361static inline __le16 *
362UniStrupr(register __le16 *upin)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700363{
Steve Frenchfdf96a92013-06-25 14:03:16 -0500364 register __le16 *up;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700365
366 up = upin;
367 while (*up) { /* For all characters */
Steve Frenchfdf96a92013-06-25 14:03:16 -0500368 *up = cpu_to_le16(UniToupper(le16_to_cpu(*up)));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700369 up++;
370 }
371 return upin; /* Return input pointer */
372}
373#endif /* UNIUPR_NOUPPER */
374
375#ifndef UNIUPR_NOLOWER
376/*
377 * UniTolower: Convert a unicode character to lower case
378 */
379static inline wchar_t
Igor Druzhininbf4f1212010-08-20 00:27:12 +0400380UniTolower(register wchar_t uc)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700381{
Igor Druzhininbf4f1212010-08-20 00:27:12 +0400382 register const struct UniCaseRange *rp;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700383
Igor Druzhininbf4f1212010-08-20 00:27:12 +0400384 if (uc < sizeof(CifsUniLowerTable)) {
Steve Frenchad7a2922008-02-07 23:25:02 +0000385 /* Latin characters */
Igor Druzhininbf4f1212010-08-20 00:27:12 +0400386 return uc + CifsUniLowerTable[uc]; /* Use base tables */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700387 } else {
Igor Druzhininbf4f1212010-08-20 00:27:12 +0400388 rp = CifsUniLowerRange; /* Use range tables */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700389 while (rp->start) {
390 if (uc < rp->start) /* Before start of range */
391 return uc; /* Uppercase = input */
392 if (uc <= rp->end) /* In range */
393 return uc + rp->table[uc - rp->start];
394 rp++; /* Try next range */
395 }
396 }
397 return uc; /* Past last range */
398}
399
400/*
401 * UniStrlwr: Lower case a unicode string
402 */
403static inline wchar_t *
Steve French50c2f752007-07-13 00:33:32 +0000404UniStrlwr(register wchar_t *upin)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700405{
406 register wchar_t *up;
407
408 up = upin;
409 while (*up) { /* For all characters */
410 *up = UniTolower(*up);
411 up++;
412 }
413 return upin; /* Return input pointer */
414}
415
416#endif
Igor Druzhininbf4f1212010-08-20 00:27:12 +0400417
418#endif /* _CIFS_UNICODE_H */