Blame - jdk/src/windows/native/java/io/canonicalize_md.c - platform/libcore

blob: 4c81a80d62b4ff9f29654ac1669345789887260b [file] [log] [blame]

J. Duke	319a3b9	2007-12-01 00:00:00 +0000	[diff] [blame^]	1	/*
				2	* Copyright 1998-2005 Sun Microsystems, Inc. All Rights Reserved.
				3	* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
				4	*
				5	* This code is free software; you can redistribute it and/or modify it
				6	* under the terms of the GNU General Public License version 2 only, as
				7	* published by the Free Software Foundation. Sun designates this
				8	* particular file as subject to the "Classpath" exception as provided
				9	* by Sun in the LICENSE file that accompanied this code.
				10	*
				11	* This code is distributed in the hope that it will be useful, but WITHOUT
				12	* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
				13	* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
				14	* version 2 for more details (a copy is included in the LICENSE file that
				15	* accompanied this code).
				16	*
				17	* You should have received a copy of the GNU General Public License version
				18	* 2 along with this work; if not, write to the Free Software Foundation,
				19	* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
				20	*
				21	* Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
				22	* CA 95054 USA or visit www.sun.com if you need additional information or
				23	* have any questions.
				24	*/
				25
				26	/*
				27	* Pathname canonicalization for Win32 file systems
				28	*/
				29
				30	#include <stdio.h>
				31	#include <stdlib.h>
				32	#include <string.h>
				33	#include <ctype.h>
				34	#include <assert.h>
				35	#include <sys/stat.h>
				36
				37	#include <windows.h>
				38	#include <winbase.h>
				39	#include <errno.h>
				40	#include "io_util_md.h"
				41
				42	#undef DEBUG_PATH /* Define this to debug path code */
				43
				44	#define isfilesep(c) ((c) == '/' \|\| (c) == '\\')
				45	#define wisfilesep(c) ((c) == L'/' \|\| (c) == L'\\')
				46	#define islb(c) (IsDBCSLeadByte((BYTE)(c)))
				47
				48
				49	/* Copy bytes to dst, not going past dend; return dst + number of bytes copied,
				50	or NULL if dend would have been exceeded. If first != '\0', copy that byte
				51	before copying bytes from src to send - 1. */
				52
				53	static char *
				54	cp(char dst, char dend, char first, char src, char send)
				55	{
				56	char p = src, q = dst;
				57	if (first != '\0') {
				58	if (q < dend) {
				59	*q++ = first;
				60	} else {
				61	errno = ENAMETOOLONG;
				62	return NULL;
				63	}
				64	}
				65	if (send - p > dend - q) {
				66	errno = ENAMETOOLONG;
				67	return NULL;
				68	}
				69	while (p < send) {
				70	q++ = p++;
				71	}
				72	return q;
				73	}
				74
				75	/* Wide character version of cp */
				76
				77	static WCHAR*
				78	wcp(WCHAR dst, WCHAR dend, WCHAR first, WCHAR src, WCHAR send)
				79	{
				80	WCHAR p = src, q = dst;
				81	if (first != L'\0') {
				82	if (q < dend) {
				83	*q++ = first;
				84	} else {
				85	errno = ENAMETOOLONG;
				86	return NULL;
				87	}
				88	}
				89	if (send - p > dend - q) {
				90	errno = ENAMETOOLONG;
				91	return NULL;
				92	}
				93	while (p < send)
				94	q++ = p++;
				95	return q;
				96	}
				97
				98
				99	/* Find first instance of '\\' at or following start. Return the address of
				100	that byte or the address of the null terminator if '\\' is not found. */
				101
				102	static char *
				103	nextsep(char *start)
				104	{
				105	char *p = start;
				106	int c;
				107	while ((c = *p) && (c != '\\')) {
				108	p += ((islb(c) && p[1]) ? 2 : 1);
				109	}
				110	return p;
				111	}
				112
				113	/* Wide character version of nextsep */
				114
				115	static WCHAR *
				116	wnextsep(WCHAR *start)
				117	{
				118	WCHAR *p = start;
				119	int c;
				120	while ((c = *p) && (c != L'\\'))
				121	p++;
				122	return p;
				123	}
				124
				125	/* Tell whether the given string contains any wildcard characters */
				126
				127	static int
				128	wild(char *start)
				129	{
				130	char *p = start;
				131	int c;
				132	while (c = *p) {
				133	if ((c == '*') \|\| (c == '?')) return 1;
				134	p += ((islb(c) && p[1]) ? 2 : 1);
				135	}
				136	return 0;
				137	}
				138
				139	/* Wide character version of wild */
				140
				141	static int
				142	wwild(WCHAR *start)
				143	{
				144	WCHAR *p = start;
				145	int c;
				146	while (c = *p) {
				147	if ((c == L'*') \|\| (c == L'?'))
				148	return 1;
				149	p++;
				150	}
				151	return 0;
				152	}
				153
				154	/* Tell whether the given string contains prohibited combinations of dots.
				155	In the canonicalized form no path element may have dots at its end.
				156	Allowed canonical paths: c:\xa...dksd\..ksa\.lk c:\...a\.b\cd..x.x
				157	Prohibited canonical paths: c:\..\x c:\x.\d c:\...
				158	*/
				159	static int
				160	dots(char *start)
				161	{
				162	char *p = start;
				163	while (*p) {
				164	if ((p = strchr(p, '.')) == NULL) // find next occurence of '.'
				165	return 0; // no more dots
				166	p++; // next char
				167	while ((*p) == '.') // go to the end of dots
				168	p++;
				169	if (p && (p != '\\')) // path element does not end with a dot
				170	p++; // go to the next char
				171	else
				172	return 1; // path element does end with a dot - prohibited
				173	}
				174	return 0; // no prohibited combinations of dots found
				175	}
				176
				177	/* Wide character version of dots */
				178	static int
				179	wdots(WCHAR *start)
				180	{
				181	WCHAR *p = start;
				182	while (*p) {
				183	if ((p = wcschr(p, L'.')) == NULL) // find next occurence of '.'
				184	return 0; // no more dots
				185	p++; // next char
				186	while ((*p) == L'.') // go to the end of dots
				187	p++;
				188	if (p && (p != L'\\')) // path element does not end with a dot
				189	p++; // go to the next char
				190	else
				191	return 1; // path element does end with a dot - prohibited
				192	}
				193	return 0; // no prohibited combinations of dots found
				194	}
				195
				196	/* If the lookup of a particular prefix fails because the file does not exist,
				197	because it is of the wrong type, because access is denied, or because the
				198	network is unreachable then canonicalization does not fail, it terminates
				199	successfully after copying the rest of the original path to the result path.
				200	Other I/O errors cause an error return.
				201	*/
				202
				203	int
				204	lastErrorReportable()
				205	{
				206	DWORD errval = GetLastError();
				207	if ((errval == ERROR_FILE_NOT_FOUND)
				208	\|\| (errval == ERROR_DIRECTORY)
				209	\|\| (errval == ERROR_PATH_NOT_FOUND)
				210	\|\| (errval == ERROR_BAD_NETPATH)
				211	\|\| (errval == ERROR_BAD_NET_NAME)
				212	\|\| (errval == ERROR_ACCESS_DENIED)
				213	\|\| (errval == ERROR_NETWORK_UNREACHABLE)
				214	\|\| (errval == ERROR_NETWORK_ACCESS_DENIED)) {
				215	return 0;
				216	}
				217
				218	#ifdef DEBUG_PATH
				219	jio_fprintf(stderr, "canonicalize: errval %d\n", errval);
				220	#endif
				221	return 1;
				222	}
				223
				224	/* Convert a pathname to canonical form. The input orig_path is assumed to
				225	have been converted to native form already, via JVM_NativePath(). This is
				226	necessary because _fullpath() rejects duplicate separator characters on
				227	Win95, though it accepts them on NT. */
				228
				229	int
				230	canonicalize(char orig_path, char result, int size)
				231	{
				232	WIN32_FIND_DATA fd;
				233	HANDLE h;
				234	char path[1024]; /* Working copy of path */
				235	char src, dst, *dend;
				236
				237	/* Reject paths that contain wildcards */
				238	if (wild(orig_path)) {
				239	errno = EINVAL;
				240	return -1;
				241	}
				242
				243	/* Collapse instances of "foo\.." and ensure absoluteness. Note that
				244	contrary to the documentation, the _fullpath procedure does not require
				245	the drive to be available. It also does not reliably change all
				246	occurrences of '/' to '\\' on Win95, so now JVM_NativePath does that. */
				247	if(!_fullpath(path, orig_path, sizeof(path))) {
				248	return -1;
				249	}
				250
				251	/* Correction for Win95: _fullpath may leave a trailing "\\"
				252	on a UNC pathname */
				253	if ((path[0] == '\\') && (path[1] == '\\')) {
				254	char *p = path + strlen(path);
				255	if ((p[-1] == '\\') && !islb(p[-2])) {
				256	p[-1] = '\0';
				257	}
				258	}
				259
				260	if (dots(path)) /* Check for prohibited combinations of dots */
				261	return -1;
				262
				263	src = path; /* Start scanning here */
				264	dst = result; /* Place results here */
				265	dend = dst + size; /* Don't go to or past here */
				266
				267	/* Copy prefix, assuming path is absolute */
				268	if (isalpha(src[0]) && (src[1] == ':') && (src[2] == '\\')) {
				269	/* Drive specifier */
				270	src = toupper(src); /* Canonicalize drive letter */
				271	if (!(dst = cp(dst, dend, '\0', src, src + 2))) {
				272	return -1;
				273	}
				274	src += 2;
				275	} else if ((src[0] == '\\') && (src[1] == '\\')) {
				276	/* UNC pathname */
				277	char *p;
				278	p = nextsep(src + 2); /* Skip past host name */
				279	if (!*p) {
				280	/* A UNC pathname must begin with "\\\\host\\share",
				281	so reject this path as invalid if there is no share name */
				282	errno = EINVAL;
				283	return -1;
				284	}
				285	p = nextsep(p + 1); /* Skip past share name */
				286	if (!(dst = cp(dst, dend, '\0', src, p))) {
				287	return -1;
				288	}
				289	src = p;
				290	} else {
				291	/* Invalid path */
				292	errno = EINVAL;
				293	return -1;
				294	}
				295
				296	/* Windows 95/98/Me bug - FindFirstFile fails on network mounted drives */
				297	/* for root pathes like "E:\" . If the path has this form, we should */
				298	/* simply return it, it is already canonicalized. */
				299	if (strlen(path) == 3 && path[1] == ':' && path[2] == '\\') {
				300	/* At this point we have already copied the drive specifier ("z:")*/
				301	/* so we need to copy "\" and the null character. */
				302	result[2] = '\\';
				303	result[3] = '\0';
				304	return 0;
				305	}
				306
				307	/* At this point we have copied either a drive specifier ("z:") or a UNC
				308	prefix ("\\\\host\\share") to the result buffer, and src points to the
				309	first byte of the remainder of the path. We now scan through the rest
				310	of the path, looking up each prefix in order to find the true name of
				311	the last element of each prefix, thereby computing the full true name of
				312	the original path. */
				313	while (*src) {
				314	char p = nextsep(src + 1); / Find next separator */
				315	char c = *p;
				316	assert(src == '\\'); / Invariant */
				317	p = '\0'; / Temporarily clear separator */
				318	h = FindFirstFile(path, &fd); /* Look up prefix */
				319	p = c; / Restore separator */
				320	if (h != INVALID_HANDLE_VALUE) {
				321	/* Lookup succeeded; append true name to result and continue */
				322	FindClose(h);
				323	if (!(dst = cp(dst, dend, '\\',
				324	fd.cFileName,
				325	fd.cFileName + strlen(fd.cFileName)))) {
				326	return -1;
				327	}
				328	src = p;
				329	continue;
				330	} else {
				331	if (!lastErrorReportable()) {
				332	if (!(dst = cp(dst, dend, '\0', src, src + strlen(src)))) {
				333	return -1;
				334	}
				335	break;
				336	} else {
				337	return -1;
				338	}
				339	}
				340	}
				341
				342	if (dst >= dend) {
				343	errno = ENAMETOOLONG;
				344	return -1;
				345	}
				346	*dst = '\0';
				347	return 0;
				348
				349	}
				350
				351
				352	/* Convert a pathname to canonical form. The input prefix is assumed
				353	to be in canonical form already, and the trailing filename must not
				354	contain any wildcard, dot/double dot, or other "tricky" characters
				355	that are rejected by the canonicalize() routine above. This
				356	routine is present to allow the canonicalization prefix cache to be
				357	used while still returning canonical names with the correct
				358	capitalization. */
				359
				360	int
				361	canonicalizeWithPrefix(char* canonicalPrefix, char* pathWithCanonicalPrefix, char *result, int size)
				362	{
				363	WIN32_FIND_DATA fd;
				364	HANDLE h;
				365	char src, dst, *dend;
				366
				367	src = pathWithCanonicalPrefix;
				368	dst = result; /* Place results here */
				369	dend = dst + size; /* Don't go to or past here */
				370
				371	h = FindFirstFile(pathWithCanonicalPrefix, &fd); /* Look up file */
				372	if (h != INVALID_HANDLE_VALUE) {
				373	/* Lookup succeeded; concatenate true name to prefix */
				374	FindClose(h);
				375	if (!(dst = cp(dst, dend, '\0',
				376	canonicalPrefix,
				377	canonicalPrefix + strlen(canonicalPrefix)))) {
				378	return -1;
				379	}
				380	if (!(dst = cp(dst, dend, '\\',
				381	fd.cFileName,
				382	fd.cFileName + strlen(fd.cFileName)))) {
				383	return -1;
				384	}
				385	} else {
				386	if (!lastErrorReportable()) {
				387	if (!(dst = cp(dst, dend, '\0', src, src + strlen(src)))) {
				388	return -1;
				389	}
				390	} else {
				391	return -1;
				392	}
				393	}
				394
				395	if (dst >= dend) {
				396	errno = ENAMETOOLONG;
				397	return -1;
				398	}
				399	*dst = '\0';
				400	return 0;
				401	}
				402
				403
				404	/* Wide character version of canonicalize. Size is a wide-character size. */
				405
				406	int
				407	wcanonicalize(WCHAR orig_path, WCHAR result, int size)
				408	{
				409	WIN32_FIND_DATAW fd;
				410	HANDLE h;
				411	WCHAR path; / Working copy of path */
				412	WCHAR src, dst, *dend, c;
				413
				414	/* Reject paths that contain wildcards */
				415	if (wwild(orig_path)) {
				416	errno = EINVAL;
				417	return -1;
				418	}
				419
				420	if ((path = (WCHAR)malloc(size sizeof(WCHAR))) == NULL)
				421	return -1;
				422
				423	/* Collapse instances of "foo\.." and ensure absoluteness. Note that
				424	contrary to the documentation, the _fullpath procedure does not require
				425	the drive to be available. */
				426	if(!_wfullpath(path, orig_path, size)) {
				427	goto err;
				428	}
				429
				430	if (wdots(path)) /* Check for prohibited combinations of dots */
				431	goto err;
				432
				433	src = path; /* Start scanning here */
				434	dst = result; /* Place results here */
				435	dend = dst + size; /* Don't go to or past here */
				436
				437	/* Copy prefix, assuming path is absolute */
				438	c = src[0];
				439	if (((c <= L'z' && c >= L'a') \|\| (c <= L'Z' && c >= L'A'))
				440	&& (src[1] == L':') && (src[2] == L'\\')) {
				441	/* Drive specifier */
				442	src = towupper(src); /* Canonicalize drive letter */
				443	if (!(dst = wcp(dst, dend, L'\0', src, src + 2))) {
				444	goto err;
				445	}
				446
				447	src += 2;
				448	} else if ((src[0] == L'\\') && (src[1] == L'\\')) {
				449	/* UNC pathname */
				450	WCHAR *p;
				451	p = wnextsep(src + 2); /* Skip past host name */
				452	if (!*p) {
				453	/* A UNC pathname must begin with "\\\\host\\share",
				454	so reject this path as invalid if there is no share name */
				455	errno = EINVAL;
				456	goto err;
				457	}
				458	p = wnextsep(p + 1); /* Skip past share name */
				459	if (!(dst = wcp(dst, dend, L'\0', src, p)))
				460	goto err;
				461	src = p;
				462	} else {
				463	/* Invalid path */
				464	errno = EINVAL;
				465	goto err;
				466	}
				467	/* At this point we have copied either a drive specifier ("z:") or a UNC
				468	prefix ("\\\\host\\share") to the result buffer, and src points to the
				469	first byte of the remainder of the path. We now scan through the rest
				470	of the path, looking up each prefix in order to find the true name of
				471	the last element of each prefix, thereby computing the full true name of
				472	the original path. */
				473	while (*src) {
				474	WCHAR p = wnextsep(src + 1); / Find next separator */
				475	WCHAR c = *p;
				476	WCHAR *pathbuf;
				477	int pathlen;
				478
				479	assert(src == L'\\'); / Invariant */
				480	p = L'\0'; / Temporarily clear separator */
				481
				482	if ((pathlen = wcslen(path)) > MAX_PATH - 1) {
				483	pathbuf = getPrefixed(path, pathlen);
				484	h = FindFirstFileW(pathbuf, &fd); /* Look up prefix */
				485	free(pathbuf);
				486	} else
				487	h = FindFirstFileW(path, &fd); /* Look up prefix */
				488
				489	p = c; / Restore separator */
				490	if (h != INVALID_HANDLE_VALUE) {
				491	/* Lookup succeeded; append true name to result and continue */
				492	FindClose(h);
				493	if (!(dst = wcp(dst, dend, L'\\', fd.cFileName,
				494	fd.cFileName + wcslen(fd.cFileName)))){
				495	goto err;
				496	}
				497	src = p;
				498	continue;
				499	} else {
				500	if (!lastErrorReportable()) {
				501	if (!(dst = wcp(dst, dend, L'\0', src, src + wcslen(src)))){
				502	goto err;
				503	}
				504	break;
				505	} else {
				506	goto err;
				507	}
				508	}
				509	}
				510
				511	if (dst >= dend) {
				512	errno = ENAMETOOLONG;
				513	goto err;
				514	}
				515	*dst = L'\0';
				516	free(path);
				517	return 0;
				518
				519	err:
				520	free(path);
				521	return -1;
				522	}
				523
				524
				525	/* Wide character version of canonicalizeWithPrefix. */
				526
				527	int
				528	wcanonicalizeWithPrefix(WCHAR canonicalPrefix, WCHAR pathWithCanonicalPrefix, WCHAR *result, int size)
				529	{
				530	WIN32_FIND_DATAW fd;
				531	HANDLE h;
				532	WCHAR src, dst, *dend;
				533	WCHAR *pathbuf;
				534	int pathlen;
				535
				536	src = pathWithCanonicalPrefix;
				537	dst = result; /* Place results here */
				538	dend = dst + size; /* Don't go to or past here */
				539
				540
				541	if ((pathlen=wcslen(pathWithCanonicalPrefix)) > MAX_PATH - 1) {
				542	pathbuf = getPrefixed(pathWithCanonicalPrefix, pathlen);
				543	h = FindFirstFileW(pathbuf, &fd); /* Look up prefix */
				544	free(pathbuf);
				545	} else
				546	h = FindFirstFileW(pathWithCanonicalPrefix, &fd); /* Look up prefix */
				547	if (h != INVALID_HANDLE_VALUE) {
				548	/* Lookup succeeded; append true name to result and continue */
				549	FindClose(h);
				550	if (!(dst = wcp(dst, dend, L'\0',
				551	canonicalPrefix,
				552	canonicalPrefix + wcslen(canonicalPrefix)))) {
				553	return -1;
				554	}
				555	if (!(dst = wcp(dst, dend, L'\\',
				556	fd.cFileName,
				557	fd.cFileName + wcslen(fd.cFileName)))) {
				558	return -1;
				559	}
				560	} else {
				561	if (!lastErrorReportable()) {
				562	if (!(dst = wcp(dst, dend, L'\0', src, src + wcslen(src)))) {
				563	return -1;
				564	}
				565	} else {
				566	return -1;
				567	}
				568	}
				569
				570	if (dst >= dend) {
				571	errno = ENAMETOOLONG;
				572	return -1;
				573	}
				574	*dst = L'\0';
				575	return 0;
				576	}
				577
				578
				579	/* The appropriate location of getPrefixed() should be io_util_md.c, but
				580	java.lang.instrument package has hardwired canonicalize_md.c into their
				581	dll, to avoid complicate solution such as including io_util_md.c into
				582	that package, as a workaround we put this method here.
				583	*/
				584
				585	/* copy \\?\ or \\?\UNC\ to the front of path*/
				586	WCHAR*
				587	getPrefixed(const WCHAR* path, int pathlen) {
				588	WCHAR* pathbuf = (WCHAR)malloc((pathlen + 10) sizeof (WCHAR));
				589	if (pathbuf != 0) {
				590	if (path[0] == L'\\' && path[1] == L'\\') {
				591	if (path[2] == L'?' && path[3] == L'\\'){
				592	/* if it already has a \\?\ don't do the prefix */
				593	wcscpy(pathbuf, path );
				594	} else {
				595	/* only UNC pathname includes double slashes here */
				596	wcscpy(pathbuf, L"\\\\?\\UNC\0");
				597	wcscat(pathbuf, path + 1);
				598	}
				599	} else {
				600	wcscpy(pathbuf, L"\\\\?\\\0");
				601	wcscat(pathbuf, path );
				602	}
				603	}
				604	return pathbuf;
				605	}