blob: 4fb097468e214feb00b8602dd6dd658b20fafd88 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * cifs_unicode: Unicode kernel case support
3 *
4 * Function:
5 * Convert a unicode character to upper or lower case using
6 * compressed tables.
7 *
Steve Frenchd185cda2009-04-30 17:45:10 +00008 * Copyright (c) International Business Machines Corp., 2000,2009
Linus Torvalds1da177e2005-04-16 15:20:36 -07009 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
Steve Frenchd38d8c72007-06-28 19:44:13 +000012 * the Free Software Foundation; either version 2 of the License, or
Linus Torvalds1da177e2005-04-16 15:20:36 -070013 * (at your option) any later version.
Steve Frenchd38d8c72007-06-28 19:44:13 +000014 *
Linus Torvalds1da177e2005-04-16 15:20:36 -070015 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
18 * the GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
Steve Frenchd38d8c72007-06-28 19:44:13 +000021 * along with this program; if not, write to the Free Software
Linus Torvalds1da177e2005-04-16 15:20:36 -070022 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 *
24 *
25 * Notes:
26 * These APIs are based on the C library functions. The semantics
27 * should match the C functions but with expanded size operands.
28 *
29 * The upper/lower functions are based on a table created by mkupr.
30 * This is a compressed table of upper and lower case conversion.
31 *
32 */
Igor Druzhininbf4f1212010-08-20 00:27:12 +040033#ifndef _CIFS_UNICODE_H
34#define _CIFS_UNICODE_H
Linus Torvalds1da177e2005-04-16 15:20:36 -070035
36#include <asm/byteorder.h>
37#include <linux/types.h>
38#include <linux/nls.h>
39
40#define UNIUPR_NOLOWER /* Example to not expand lower case tables */
41
Jeff Layton66345f52009-04-30 06:45:08 -040042/*
43 * Windows maps these to the user defined 16 bit Unicode range since they are
44 * reserved symbols (along with \ and /), otherwise illegal to store
45 * in filenames in NTFS
46 */
Jeff Layton581ade42011-04-05 15:02:37 -040047#define UNI_ASTERISK (__u16) ('*' + 0xF000)
Jeff Layton66345f52009-04-30 06:45:08 -040048#define UNI_QUESTION (__u16) ('?' + 0xF000)
49#define UNI_COLON (__u16) (':' + 0xF000)
50#define UNI_GRTRTHAN (__u16) ('>' + 0xF000)
51#define UNI_LESSTHAN (__u16) ('<' + 0xF000)
52#define UNI_PIPE (__u16) ('|' + 0xF000)
53#define UNI_SLASH (__u16) ('\\' + 0xF000)
54
Linus Torvalds1da177e2005-04-16 15:20:36 -070055/* Just define what we want from uniupr.h. We don't want to define the tables
56 * in each source file.
57 */
58#ifndef UNICASERANGE_DEFINED
59struct UniCaseRange {
60 wchar_t start;
61 wchar_t end;
62 signed char *table;
63};
64#endif /* UNICASERANGE_DEFINED */
65
66#ifndef UNIUPR_NOUPPER
67extern signed char CifsUniUpperTable[512];
68extern const struct UniCaseRange CifsUniUpperRange[];
69#endif /* UNIUPR_NOUPPER */
70
71#ifndef UNIUPR_NOLOWER
Igor Druzhininbf4f1212010-08-20 00:27:12 +040072extern signed char CifsUniLowerTable[512];
73extern const struct UniCaseRange CifsUniLowerRange[];
Linus Torvalds1da177e2005-04-16 15:20:36 -070074#endif /* UNIUPR_NOLOWER */
75
76#ifdef __KERNEL__
Steve Frenchacbbb762012-01-18 22:32:33 -060077int cifs_from_utf16(char *to, const __le16 *from, int tolen, int fromlen,
78 const struct nls_table *codepage, bool mapchar);
79int cifs_utf16_bytes(const __le16 *from, int maxbytes,
80 const struct nls_table *codepage);
81int cifs_strtoUTF16(__le16 *, const char *, int, const struct nls_table *);
82char *cifs_strndup_from_utf16(const char *src, const int maxlen,
83 const bool is_unicode,
84 const struct nls_table *codepage);
85extern int cifsConvertToUTF16(__le16 *target, const char *source, int maxlen,
86 const struct nls_table *cp, int mapChars);
Pavel Shilovsky2503a0d2011-12-26 22:58:46 +040087#ifdef CONFIG_CIFS_SMB2
88extern __le16 *cifs_strndup_to_utf16(const char *src, const int maxlen,
89 int *utf16_len, const struct nls_table *cp,
90 int remap);
91#endif /* CONFIG_CIFS_SMB2 */
Linus Torvalds1da177e2005-04-16 15:20:36 -070092#endif
93
94/*
95 * UniStrcat: Concatenate the second string to the first
96 *
97 * Returns:
98 * Address of the first string
99 */
100static inline wchar_t *
Steve French50c2f752007-07-13 00:33:32 +0000101UniStrcat(wchar_t *ucs1, const wchar_t *ucs2)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700102{
103 wchar_t *anchor = ucs1; /* save a pointer to start of ucs1 */
104
105 while (*ucs1++) ; /* To end of first string */
106 ucs1--; /* Return to the null */
107 while ((*ucs1++ = *ucs2++)) ; /* copy string 2 over */
108 return anchor;
109}
110
111/*
112 * UniStrchr: Find a character in a string
113 *
114 * Returns:
115 * Address of first occurrence of character in string
116 * or NULL if the character is not in the string
117 */
118static inline wchar_t *
Steve French50c2f752007-07-13 00:33:32 +0000119UniStrchr(const wchar_t *ucs, wchar_t uc)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700120{
121 while ((*ucs != uc) && *ucs)
122 ucs++;
123
124 if (*ucs == uc)
125 return (wchar_t *) ucs;
126 return NULL;
127}
128
129/*
130 * UniStrcmp: Compare two strings
131 *
132 * Returns:
133 * < 0: First string is less than second
134 * = 0: Strings are equal
135 * > 0: First string is greater than second
136 */
137static inline int
Steve French50c2f752007-07-13 00:33:32 +0000138UniStrcmp(const wchar_t *ucs1, const wchar_t *ucs2)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700139{
140 while ((*ucs1 == *ucs2) && *ucs1) {
141 ucs1++;
142 ucs2++;
143 }
144 return (int) *ucs1 - (int) *ucs2;
145}
146
147/*
148 * UniStrcpy: Copy a string
149 */
150static inline wchar_t *
Steve French50c2f752007-07-13 00:33:32 +0000151UniStrcpy(wchar_t *ucs1, const wchar_t *ucs2)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700152{
153 wchar_t *anchor = ucs1; /* save the start of result string */
154
155 while ((*ucs1++ = *ucs2++)) ;
156 return anchor;
157}
158
159/*
160 * UniStrlen: Return the length of a string (in 16 bit Unicode chars not bytes)
161 */
162static inline size_t
Steve French50c2f752007-07-13 00:33:32 +0000163UniStrlen(const wchar_t *ucs1)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700164{
165 int i = 0;
166
167 while (*ucs1++)
168 i++;
169 return i;
170}
171
172/*
Steve Frenchd38d8c72007-06-28 19:44:13 +0000173 * UniStrnlen: Return the length (in 16 bit Unicode chars not bytes) of a
174 * string (length limited)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700175 */
176static inline size_t
Steve French50c2f752007-07-13 00:33:32 +0000177UniStrnlen(const wchar_t *ucs1, int maxlen)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700178{
179 int i = 0;
180
181 while (*ucs1++) {
182 i++;
183 if (i >= maxlen)
184 break;
185 }
186 return i;
187}
188
189/*
190 * UniStrncat: Concatenate length limited string
191 */
192static inline wchar_t *
Steve French50c2f752007-07-13 00:33:32 +0000193UniStrncat(wchar_t *ucs1, const wchar_t *ucs2, size_t n)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700194{
195 wchar_t *anchor = ucs1; /* save pointer to string 1 */
196
197 while (*ucs1++) ;
198 ucs1--; /* point to null terminator of s1 */
199 while (n-- && (*ucs1 = *ucs2)) { /* copy s2 after s1 */
200 ucs1++;
201 ucs2++;
202 }
203 *ucs1 = 0; /* Null terminate the result */
204 return (anchor);
205}
206
207/*
208 * UniStrncmp: Compare length limited string
209 */
210static inline int
Steve French50c2f752007-07-13 00:33:32 +0000211UniStrncmp(const wchar_t *ucs1, const wchar_t *ucs2, size_t n)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700212{
213 if (!n)
214 return 0; /* Null strings are equal */
215 while ((*ucs1 == *ucs2) && *ucs1 && --n) {
216 ucs1++;
217 ucs2++;
218 }
219 return (int) *ucs1 - (int) *ucs2;
220}
221
222/*
223 * UniStrncmp_le: Compare length limited string - native to little-endian
224 */
225static inline int
Steve French50c2f752007-07-13 00:33:32 +0000226UniStrncmp_le(const wchar_t *ucs1, const wchar_t *ucs2, size_t n)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700227{
228 if (!n)
229 return 0; /* Null strings are equal */
230 while ((*ucs1 == __le16_to_cpu(*ucs2)) && *ucs1 && --n) {
231 ucs1++;
232 ucs2++;
233 }
234 return (int) *ucs1 - (int) __le16_to_cpu(*ucs2);
235}
236
237/*
238 * UniStrncpy: Copy length limited string with pad
239 */
240static inline wchar_t *
Steve French50c2f752007-07-13 00:33:32 +0000241UniStrncpy(wchar_t *ucs1, const wchar_t *ucs2, size_t n)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700242{
243 wchar_t *anchor = ucs1;
244
245 while (n-- && *ucs2) /* Copy the strings */
246 *ucs1++ = *ucs2++;
247
248 n++;
249 while (n--) /* Pad with nulls */
250 *ucs1++ = 0;
251 return anchor;
252}
253
254/*
255 * UniStrncpy_le: Copy length limited string with pad to little-endian
256 */
257static inline wchar_t *
Steve French50c2f752007-07-13 00:33:32 +0000258UniStrncpy_le(wchar_t *ucs1, const wchar_t *ucs2, size_t n)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700259{
260 wchar_t *anchor = ucs1;
261
262 while (n-- && *ucs2) /* Copy the strings */
263 *ucs1++ = __le16_to_cpu(*ucs2++);
264
265 n++;
266 while (n--) /* Pad with nulls */
267 *ucs1++ = 0;
268 return anchor;
269}
270
271/*
272 * UniStrstr: Find a string in a string
273 *
274 * Returns:
275 * Address of first match found
276 * NULL if no matching string is found
277 */
278static inline wchar_t *
Steve French50c2f752007-07-13 00:33:32 +0000279UniStrstr(const wchar_t *ucs1, const wchar_t *ucs2)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700280{
281 const wchar_t *anchor1 = ucs1;
282 const wchar_t *anchor2 = ucs2;
283
284 while (*ucs1) {
Steve Frenchad7a2922008-02-07 23:25:02 +0000285 if (*ucs1 == *ucs2) {
286 /* Partial match found */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700287 ucs1++;
288 ucs2++;
289 } else {
290 if (!*ucs2) /* Match found */
291 return (wchar_t *) anchor1;
292 ucs1 = ++anchor1; /* No match */
293 ucs2 = anchor2;
294 }
295 }
296
297 if (!*ucs2) /* Both end together */
298 return (wchar_t *) anchor1; /* Match found */
299 return NULL; /* No match */
300}
301
302#ifndef UNIUPR_NOUPPER
303/*
304 * UniToupper: Convert a unicode character to upper case
305 */
306static inline wchar_t
307UniToupper(register wchar_t uc)
308{
309 register const struct UniCaseRange *rp;
310
Steve Frenchad7a2922008-02-07 23:25:02 +0000311 if (uc < sizeof(CifsUniUpperTable)) {
312 /* Latin characters */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700313 return uc + CifsUniUpperTable[uc]; /* Use base tables */
314 } else {
315 rp = CifsUniUpperRange; /* Use range tables */
316 while (rp->start) {
317 if (uc < rp->start) /* Before start of range */
318 return uc; /* Uppercase = input */
319 if (uc <= rp->end) /* In range */
320 return uc + rp->table[uc - rp->start];
321 rp++; /* Try next range */
322 }
323 }
324 return uc; /* Past last range */
325}
326
327/*
328 * UniStrupr: Upper case a unicode string
329 */
330static inline wchar_t *
Steve French50c2f752007-07-13 00:33:32 +0000331UniStrupr(register wchar_t *upin)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700332{
333 register wchar_t *up;
334
335 up = upin;
336 while (*up) { /* For all characters */
337 *up = UniToupper(*up);
338 up++;
339 }
340 return upin; /* Return input pointer */
341}
342#endif /* UNIUPR_NOUPPER */
343
344#ifndef UNIUPR_NOLOWER
345/*
346 * UniTolower: Convert a unicode character to lower case
347 */
348static inline wchar_t
Igor Druzhininbf4f1212010-08-20 00:27:12 +0400349UniTolower(register wchar_t uc)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700350{
Igor Druzhininbf4f1212010-08-20 00:27:12 +0400351 register const struct UniCaseRange *rp;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700352
Igor Druzhininbf4f1212010-08-20 00:27:12 +0400353 if (uc < sizeof(CifsUniLowerTable)) {
Steve Frenchad7a2922008-02-07 23:25:02 +0000354 /* Latin characters */
Igor Druzhininbf4f1212010-08-20 00:27:12 +0400355 return uc + CifsUniLowerTable[uc]; /* Use base tables */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700356 } else {
Igor Druzhininbf4f1212010-08-20 00:27:12 +0400357 rp = CifsUniLowerRange; /* Use range tables */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700358 while (rp->start) {
359 if (uc < rp->start) /* Before start of range */
360 return uc; /* Uppercase = input */
361 if (uc <= rp->end) /* In range */
362 return uc + rp->table[uc - rp->start];
363 rp++; /* Try next range */
364 }
365 }
366 return uc; /* Past last range */
367}
368
369/*
370 * UniStrlwr: Lower case a unicode string
371 */
372static inline wchar_t *
Steve French50c2f752007-07-13 00:33:32 +0000373UniStrlwr(register wchar_t *upin)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700374{
375 register wchar_t *up;
376
377 up = upin;
378 while (*up) { /* For all characters */
379 *up = UniTolower(*up);
380 up++;
381 }
382 return upin; /* Return input pointer */
383}
384
385#endif
Igor Druzhininbf4f1212010-08-20 00:27:12 +0400386
387#endif /* _CIFS_UNICODE_H */