blob: 4c81a80d62b4ff9f29654ac1669345789887260b [file] [log] [blame]
J. Duke319a3b92007-12-01 00:00:00 +00001/*
2 * Copyright 1998-2005 Sun Microsystems, Inc. All Rights Reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation. Sun designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Sun in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
22 * CA 95054 USA or visit www.sun.com if you need additional information or
23 * have any questions.
24 */
25
26/*
27 * Pathname canonicalization for Win32 file systems
28 */
29
30#include <stdio.h>
31#include <stdlib.h>
32#include <string.h>
33#include <ctype.h>
34#include <assert.h>
35#include <sys/stat.h>
36
37#include <windows.h>
38#include <winbase.h>
39#include <errno.h>
40#include "io_util_md.h"
41
42#undef DEBUG_PATH /* Define this to debug path code */
43
44#define isfilesep(c) ((c) == '/' || (c) == '\\')
45#define wisfilesep(c) ((c) == L'/' || (c) == L'\\')
46#define islb(c) (IsDBCSLeadByte((BYTE)(c)))
47
48
49/* Copy bytes to dst, not going past dend; return dst + number of bytes copied,
50 or NULL if dend would have been exceeded. If first != '\0', copy that byte
51 before copying bytes from src to send - 1. */
52
53static char *
54cp(char *dst, char *dend, char first, char *src, char *send)
55{
56 char *p = src, *q = dst;
57 if (first != '\0') {
58 if (q < dend) {
59 *q++ = first;
60 } else {
61 errno = ENAMETOOLONG;
62 return NULL;
63 }
64 }
65 if (send - p > dend - q) {
66 errno = ENAMETOOLONG;
67 return NULL;
68 }
69 while (p < send) {
70 *q++ = *p++;
71 }
72 return q;
73}
74
75/* Wide character version of cp */
76
77static WCHAR*
78wcp(WCHAR *dst, WCHAR *dend, WCHAR first, WCHAR *src, WCHAR *send)
79{
80 WCHAR *p = src, *q = dst;
81 if (first != L'\0') {
82 if (q < dend) {
83 *q++ = first;
84 } else {
85 errno = ENAMETOOLONG;
86 return NULL;
87 }
88 }
89 if (send - p > dend - q) {
90 errno = ENAMETOOLONG;
91 return NULL;
92 }
93 while (p < send)
94 *q++ = *p++;
95 return q;
96}
97
98
99/* Find first instance of '\\' at or following start. Return the address of
100 that byte or the address of the null terminator if '\\' is not found. */
101
102static char *
103nextsep(char *start)
104{
105 char *p = start;
106 int c;
107 while ((c = *p) && (c != '\\')) {
108 p += ((islb(c) && p[1]) ? 2 : 1);
109 }
110 return p;
111}
112
113/* Wide character version of nextsep */
114
115static WCHAR *
116wnextsep(WCHAR *start)
117{
118 WCHAR *p = start;
119 int c;
120 while ((c = *p) && (c != L'\\'))
121 p++;
122 return p;
123}
124
125/* Tell whether the given string contains any wildcard characters */
126
127static int
128wild(char *start)
129{
130 char *p = start;
131 int c;
132 while (c = *p) {
133 if ((c == '*') || (c == '?')) return 1;
134 p += ((islb(c) && p[1]) ? 2 : 1);
135 }
136 return 0;
137}
138
139/* Wide character version of wild */
140
141static int
142wwild(WCHAR *start)
143{
144 WCHAR *p = start;
145 int c;
146 while (c = *p) {
147 if ((c == L'*') || (c == L'?'))
148 return 1;
149 p++;
150 }
151 return 0;
152}
153
154/* Tell whether the given string contains prohibited combinations of dots.
155 In the canonicalized form no path element may have dots at its end.
156 Allowed canonical paths: c:\xa...dksd\..ksa\.lk c:\...a\.b\cd..x.x
157 Prohibited canonical paths: c:\..\x c:\x.\d c:\...
158*/
159static int
160dots(char *start)
161{
162 char *p = start;
163 while (*p) {
164 if ((p = strchr(p, '.')) == NULL) // find next occurence of '.'
165 return 0; // no more dots
166 p++; // next char
167 while ((*p) == '.') // go to the end of dots
168 p++;
169 if (*p && (*p != '\\')) // path element does not end with a dot
170 p++; // go to the next char
171 else
172 return 1; // path element does end with a dot - prohibited
173 }
174 return 0; // no prohibited combinations of dots found
175}
176
177/* Wide character version of dots */
178static int
179wdots(WCHAR *start)
180{
181 WCHAR *p = start;
182 while (*p) {
183 if ((p = wcschr(p, L'.')) == NULL) // find next occurence of '.'
184 return 0; // no more dots
185 p++; // next char
186 while ((*p) == L'.') // go to the end of dots
187 p++;
188 if (*p && (*p != L'\\')) // path element does not end with a dot
189 p++; // go to the next char
190 else
191 return 1; // path element does end with a dot - prohibited
192 }
193 return 0; // no prohibited combinations of dots found
194}
195
196/* If the lookup of a particular prefix fails because the file does not exist,
197 because it is of the wrong type, because access is denied, or because the
198 network is unreachable then canonicalization does not fail, it terminates
199 successfully after copying the rest of the original path to the result path.
200 Other I/O errors cause an error return.
201*/
202
203int
204lastErrorReportable()
205{
206 DWORD errval = GetLastError();
207 if ((errval == ERROR_FILE_NOT_FOUND)
208 || (errval == ERROR_DIRECTORY)
209 || (errval == ERROR_PATH_NOT_FOUND)
210 || (errval == ERROR_BAD_NETPATH)
211 || (errval == ERROR_BAD_NET_NAME)
212 || (errval == ERROR_ACCESS_DENIED)
213 || (errval == ERROR_NETWORK_UNREACHABLE)
214 || (errval == ERROR_NETWORK_ACCESS_DENIED)) {
215 return 0;
216 }
217
218#ifdef DEBUG_PATH
219 jio_fprintf(stderr, "canonicalize: errval %d\n", errval);
220#endif
221 return 1;
222}
223
224/* Convert a pathname to canonical form. The input orig_path is assumed to
225 have been converted to native form already, via JVM_NativePath(). This is
226 necessary because _fullpath() rejects duplicate separator characters on
227 Win95, though it accepts them on NT. */
228
229int
230canonicalize(char *orig_path, char *result, int size)
231{
232 WIN32_FIND_DATA fd;
233 HANDLE h;
234 char path[1024]; /* Working copy of path */
235 char *src, *dst, *dend;
236
237 /* Reject paths that contain wildcards */
238 if (wild(orig_path)) {
239 errno = EINVAL;
240 return -1;
241 }
242
243 /* Collapse instances of "foo\.." and ensure absoluteness. Note that
244 contrary to the documentation, the _fullpath procedure does not require
245 the drive to be available. It also does not reliably change all
246 occurrences of '/' to '\\' on Win95, so now JVM_NativePath does that. */
247 if(!_fullpath(path, orig_path, sizeof(path))) {
248 return -1;
249 }
250
251 /* Correction for Win95: _fullpath may leave a trailing "\\"
252 on a UNC pathname */
253 if ((path[0] == '\\') && (path[1] == '\\')) {
254 char *p = path + strlen(path);
255 if ((p[-1] == '\\') && !islb(p[-2])) {
256 p[-1] = '\0';
257 }
258 }
259
260 if (dots(path)) /* Check for prohibited combinations of dots */
261 return -1;
262
263 src = path; /* Start scanning here */
264 dst = result; /* Place results here */
265 dend = dst + size; /* Don't go to or past here */
266
267 /* Copy prefix, assuming path is absolute */
268 if (isalpha(src[0]) && (src[1] == ':') && (src[2] == '\\')) {
269 /* Drive specifier */
270 *src = toupper(*src); /* Canonicalize drive letter */
271 if (!(dst = cp(dst, dend, '\0', src, src + 2))) {
272 return -1;
273 }
274 src += 2;
275 } else if ((src[0] == '\\') && (src[1] == '\\')) {
276 /* UNC pathname */
277 char *p;
278 p = nextsep(src + 2); /* Skip past host name */
279 if (!*p) {
280 /* A UNC pathname must begin with "\\\\host\\share",
281 so reject this path as invalid if there is no share name */
282 errno = EINVAL;
283 return -1;
284 }
285 p = nextsep(p + 1); /* Skip past share name */
286 if (!(dst = cp(dst, dend, '\0', src, p))) {
287 return -1;
288 }
289 src = p;
290 } else {
291 /* Invalid path */
292 errno = EINVAL;
293 return -1;
294 }
295
296 /* Windows 95/98/Me bug - FindFirstFile fails on network mounted drives */
297 /* for root pathes like "E:\" . If the path has this form, we should */
298 /* simply return it, it is already canonicalized. */
299 if (strlen(path) == 3 && path[1] == ':' && path[2] == '\\') {
300 /* At this point we have already copied the drive specifier ("z:")*/
301 /* so we need to copy "\" and the null character. */
302 result[2] = '\\';
303 result[3] = '\0';
304 return 0;
305 }
306
307 /* At this point we have copied either a drive specifier ("z:") or a UNC
308 prefix ("\\\\host\\share") to the result buffer, and src points to the
309 first byte of the remainder of the path. We now scan through the rest
310 of the path, looking up each prefix in order to find the true name of
311 the last element of each prefix, thereby computing the full true name of
312 the original path. */
313 while (*src) {
314 char *p = nextsep(src + 1); /* Find next separator */
315 char c = *p;
316 assert(*src == '\\'); /* Invariant */
317 *p = '\0'; /* Temporarily clear separator */
318 h = FindFirstFile(path, &fd); /* Look up prefix */
319 *p = c; /* Restore separator */
320 if (h != INVALID_HANDLE_VALUE) {
321 /* Lookup succeeded; append true name to result and continue */
322 FindClose(h);
323 if (!(dst = cp(dst, dend, '\\',
324 fd.cFileName,
325 fd.cFileName + strlen(fd.cFileName)))) {
326 return -1;
327 }
328 src = p;
329 continue;
330 } else {
331 if (!lastErrorReportable()) {
332 if (!(dst = cp(dst, dend, '\0', src, src + strlen(src)))) {
333 return -1;
334 }
335 break;
336 } else {
337 return -1;
338 }
339 }
340 }
341
342 if (dst >= dend) {
343 errno = ENAMETOOLONG;
344 return -1;
345 }
346 *dst = '\0';
347 return 0;
348
349}
350
351
352/* Convert a pathname to canonical form. The input prefix is assumed
353 to be in canonical form already, and the trailing filename must not
354 contain any wildcard, dot/double dot, or other "tricky" characters
355 that are rejected by the canonicalize() routine above. This
356 routine is present to allow the canonicalization prefix cache to be
357 used while still returning canonical names with the correct
358 capitalization. */
359
360int
361canonicalizeWithPrefix(char* canonicalPrefix, char* pathWithCanonicalPrefix, char *result, int size)
362{
363 WIN32_FIND_DATA fd;
364 HANDLE h;
365 char *src, *dst, *dend;
366
367 src = pathWithCanonicalPrefix;
368 dst = result; /* Place results here */
369 dend = dst + size; /* Don't go to or past here */
370
371 h = FindFirstFile(pathWithCanonicalPrefix, &fd); /* Look up file */
372 if (h != INVALID_HANDLE_VALUE) {
373 /* Lookup succeeded; concatenate true name to prefix */
374 FindClose(h);
375 if (!(dst = cp(dst, dend, '\0',
376 canonicalPrefix,
377 canonicalPrefix + strlen(canonicalPrefix)))) {
378 return -1;
379 }
380 if (!(dst = cp(dst, dend, '\\',
381 fd.cFileName,
382 fd.cFileName + strlen(fd.cFileName)))) {
383 return -1;
384 }
385 } else {
386 if (!lastErrorReportable()) {
387 if (!(dst = cp(dst, dend, '\0', src, src + strlen(src)))) {
388 return -1;
389 }
390 } else {
391 return -1;
392 }
393 }
394
395 if (dst >= dend) {
396 errno = ENAMETOOLONG;
397 return -1;
398 }
399 *dst = '\0';
400 return 0;
401}
402
403
404/* Wide character version of canonicalize. Size is a wide-character size. */
405
406int
407wcanonicalize(WCHAR *orig_path, WCHAR *result, int size)
408{
409 WIN32_FIND_DATAW fd;
410 HANDLE h;
411 WCHAR *path; /* Working copy of path */
412 WCHAR *src, *dst, *dend, c;
413
414 /* Reject paths that contain wildcards */
415 if (wwild(orig_path)) {
416 errno = EINVAL;
417 return -1;
418 }
419
420 if ((path = (WCHAR*)malloc(size * sizeof(WCHAR))) == NULL)
421 return -1;
422
423 /* Collapse instances of "foo\.." and ensure absoluteness. Note that
424 contrary to the documentation, the _fullpath procedure does not require
425 the drive to be available. */
426 if(!_wfullpath(path, orig_path, size)) {
427 goto err;
428 }
429
430 if (wdots(path)) /* Check for prohibited combinations of dots */
431 goto err;
432
433 src = path; /* Start scanning here */
434 dst = result; /* Place results here */
435 dend = dst + size; /* Don't go to or past here */
436
437 /* Copy prefix, assuming path is absolute */
438 c = src[0];
439 if (((c <= L'z' && c >= L'a') || (c <= L'Z' && c >= L'A'))
440 && (src[1] == L':') && (src[2] == L'\\')) {
441 /* Drive specifier */
442 *src = towupper(*src); /* Canonicalize drive letter */
443 if (!(dst = wcp(dst, dend, L'\0', src, src + 2))) {
444 goto err;
445 }
446
447 src += 2;
448 } else if ((src[0] == L'\\') && (src[1] == L'\\')) {
449 /* UNC pathname */
450 WCHAR *p;
451 p = wnextsep(src + 2); /* Skip past host name */
452 if (!*p) {
453 /* A UNC pathname must begin with "\\\\host\\share",
454 so reject this path as invalid if there is no share name */
455 errno = EINVAL;
456 goto err;
457 }
458 p = wnextsep(p + 1); /* Skip past share name */
459 if (!(dst = wcp(dst, dend, L'\0', src, p)))
460 goto err;
461 src = p;
462 } else {
463 /* Invalid path */
464 errno = EINVAL;
465 goto err;
466 }
467 /* At this point we have copied either a drive specifier ("z:") or a UNC
468 prefix ("\\\\host\\share") to the result buffer, and src points to the
469 first byte of the remainder of the path. We now scan through the rest
470 of the path, looking up each prefix in order to find the true name of
471 the last element of each prefix, thereby computing the full true name of
472 the original path. */
473 while (*src) {
474 WCHAR *p = wnextsep(src + 1); /* Find next separator */
475 WCHAR c = *p;
476 WCHAR *pathbuf;
477 int pathlen;
478
479 assert(*src == L'\\'); /* Invariant */
480 *p = L'\0'; /* Temporarily clear separator */
481
482 if ((pathlen = wcslen(path)) > MAX_PATH - 1) {
483 pathbuf = getPrefixed(path, pathlen);
484 h = FindFirstFileW(pathbuf, &fd); /* Look up prefix */
485 free(pathbuf);
486 } else
487 h = FindFirstFileW(path, &fd); /* Look up prefix */
488
489 *p = c; /* Restore separator */
490 if (h != INVALID_HANDLE_VALUE) {
491 /* Lookup succeeded; append true name to result and continue */
492 FindClose(h);
493 if (!(dst = wcp(dst, dend, L'\\', fd.cFileName,
494 fd.cFileName + wcslen(fd.cFileName)))){
495 goto err;
496 }
497 src = p;
498 continue;
499 } else {
500 if (!lastErrorReportable()) {
501 if (!(dst = wcp(dst, dend, L'\0', src, src + wcslen(src)))){
502 goto err;
503 }
504 break;
505 } else {
506 goto err;
507 }
508 }
509 }
510
511 if (dst >= dend) {
512 errno = ENAMETOOLONG;
513 goto err;
514 }
515 *dst = L'\0';
516 free(path);
517 return 0;
518
519 err:
520 free(path);
521 return -1;
522}
523
524
525/* Wide character version of canonicalizeWithPrefix. */
526
527int
528wcanonicalizeWithPrefix(WCHAR *canonicalPrefix, WCHAR *pathWithCanonicalPrefix, WCHAR *result, int size)
529{
530 WIN32_FIND_DATAW fd;
531 HANDLE h;
532 WCHAR *src, *dst, *dend;
533 WCHAR *pathbuf;
534 int pathlen;
535
536 src = pathWithCanonicalPrefix;
537 dst = result; /* Place results here */
538 dend = dst + size; /* Don't go to or past here */
539
540
541 if ((pathlen=wcslen(pathWithCanonicalPrefix)) > MAX_PATH - 1) {
542 pathbuf = getPrefixed(pathWithCanonicalPrefix, pathlen);
543 h = FindFirstFileW(pathbuf, &fd); /* Look up prefix */
544 free(pathbuf);
545 } else
546 h = FindFirstFileW(pathWithCanonicalPrefix, &fd); /* Look up prefix */
547 if (h != INVALID_HANDLE_VALUE) {
548 /* Lookup succeeded; append true name to result and continue */
549 FindClose(h);
550 if (!(dst = wcp(dst, dend, L'\0',
551 canonicalPrefix,
552 canonicalPrefix + wcslen(canonicalPrefix)))) {
553 return -1;
554 }
555 if (!(dst = wcp(dst, dend, L'\\',
556 fd.cFileName,
557 fd.cFileName + wcslen(fd.cFileName)))) {
558 return -1;
559 }
560 } else {
561 if (!lastErrorReportable()) {
562 if (!(dst = wcp(dst, dend, L'\0', src, src + wcslen(src)))) {
563 return -1;
564 }
565 } else {
566 return -1;
567 }
568 }
569
570 if (dst >= dend) {
571 errno = ENAMETOOLONG;
572 return -1;
573 }
574 *dst = L'\0';
575 return 0;
576}
577
578
579/* The appropriate location of getPrefixed() should be io_util_md.c, but
580 java.lang.instrument package has hardwired canonicalize_md.c into their
581 dll, to avoid complicate solution such as including io_util_md.c into
582 that package, as a workaround we put this method here.
583 */
584
585/* copy \\?\ or \\?\UNC\ to the front of path*/
586WCHAR*
587getPrefixed(const WCHAR* path, int pathlen) {
588 WCHAR* pathbuf = (WCHAR*)malloc((pathlen + 10) * sizeof (WCHAR));
589 if (pathbuf != 0) {
590 if (path[0] == L'\\' && path[1] == L'\\') {
591 if (path[2] == L'?' && path[3] == L'\\'){
592 /* if it already has a \\?\ don't do the prefix */
593 wcscpy(pathbuf, path );
594 } else {
595 /* only UNC pathname includes double slashes here */
596 wcscpy(pathbuf, L"\\\\?\\UNC\0");
597 wcscat(pathbuf, path + 1);
598 }
599 } else {
600 wcscpy(pathbuf, L"\\\\?\\\0");
601 wcscat(pathbuf, path );
602 }
603 }
604 return pathbuf;
605}