messing around with support for Windows path, cleanups, trying to identify

* include/libxml/xmlIO.h xmlIO.c parser.c HTMLparser.c DOCBparser.c:
  messing around with support for Windows path, cleanups,
  trying to identify and fix the various code path to the
  filename access. Added xmlNormalizeWindowsPath()
Daniel
diff --git a/xmlIO.c b/xmlIO.c
index 3cd4c2a..137ea03 100644
--- a/xmlIO.c
+++ b/xmlIO.c
@@ -123,6 +123,64 @@
 static int xmlOutputCallbackNr = 0;
 static int xmlOutputCallbackInitialized = 0;
 
+/************************************************************************
+ *									*
+ *		Handling of Windows file paths				*
+ *									*
+ ************************************************************************/
+
+#define IS_WINDOWS_PATH(p) 					\
+	((p != NULL) &&						\
+	 (((p[0] >= 'a') && (p[0] <= 'z')) ||			\
+	  ((p[0] >= 'A') && (p[0] <= 'Z'))) &&			\
+	 (p[1] == ':') && ((p[2] == '/') || (p[2] == '\\')))
+
+
+/**
+ * xmlNormalizeWindowsPath
+ * @path:  a windows path like "C:/foo/bar"
+ *
+ * Normalize a Windows path to make an URL from it
+ *
+ * Returns a new URI which must be freed by the caller or NULL
+ *   in case of error
+ */
+xmlChar *
+xmlNormalizeWindowsPath(const xmlChar *path)
+{
+    int len, i, j;
+    xmlChar *ret;
+
+    if (path == NULL)
+	return(NULL);
+    if (!IS_WINDOWS_PATH(path))
+	return(xmlStrdup(path));
+
+    len = xmlStrlen(path);
+    len += 10;
+    ret = xmlMalloc(len);
+    if (ret == NULL)
+	return(NULL);
+
+    ret[0] = 'f';
+    ret[1] = 'i';
+    ret[2] = 'l';
+    ret[3] = 'e';
+    ret[4] = ':';
+    ret[5] = '/';
+    ret[6] = '/';
+    ret[7] = '/';
+    for (i = 0,j = 8;i < len;i++,j++) {
+	/* TODO: UTF8 conversion + URI escaping ??? */
+	if (path[i] == '\\')
+	    ret[j] = '/';
+	else
+	    ret[j] = path[i];
+    }
+    ret[j] = 0;
+    return(ret);
+}
+
 /**
  * xmlCleanupInputCallbacks:
  *
@@ -296,7 +354,7 @@
 	return((void *) fd);
     }
 
-    if (!xmlStrncasecmp(BAD_CAST filename, BAD_CAST "file://localhost", 16))
+    if (!xmlStrncasecmp(BAD_CAST filename, BAD_CAST "file://localhost/", 17))
 #if defined (_WIN32) && !defined(__CYGWIN__)
 	path = &filename[17];
 #else
@@ -343,8 +401,12 @@
 	return((void *) fd);
     }
 
-    if (!xmlStrncasecmp(BAD_CAST filename, BAD_CAST "file://localhost", 16))
+    if (!xmlStrncasecmp(BAD_CAST filename, BAD_CAST "file://localhost/", 17))
+#if defined (_WIN32) && !defined(__CYGWIN__)
+	path = &filename[17];
+#else
 	path = &filename[16];
+#endif
     else if (!xmlStrncasecmp(BAD_CAST filename, BAD_CAST "file:///", 8)) {
 #if defined (_WIN32) && !defined(__CYGWIN__)
 	path = &filename[8];
@@ -460,8 +522,12 @@
 	return((void *) fd);
     }
 
-    if (!xmlStrncasecmp(BAD_CAST filename, BAD_CAST "file://localhost", 16))
+    if (!xmlStrncasecmp(BAD_CAST filename, BAD_CAST "file://localhost/", 17))
+#if defined (_WIN32) && !defined(__CYGWIN__)
+	path = &filename[17];
+#else
 	path = &filename[16];
+#endif
     else if (!xmlStrncasecmp(BAD_CAST filename, BAD_CAST "file:///", 8)) {
 #if defined (_WIN32) && !defined(__CYGWIN__)
 	path = &filename[8];
@@ -502,8 +568,12 @@
 	return((void *) fd);
     }
 
-    if (!xmlStrncasecmp(BAD_CAST filename, BAD_CAST "file://localhost", 16))
+    if (!xmlStrncasecmp(BAD_CAST filename, BAD_CAST "file://localhost/", 17))
+#if defined (_WIN32) && !defined(__CYGWIN__)
+	path = &filename[17];
+#else
 	path = &filename[16];
+#endif
     else if (!xmlStrncasecmp(BAD_CAST filename, BAD_CAST "file:///", 8)) {
 #if defined (_WIN32) && !defined(__CYGWIN__)
 	path = &filename[8];
@@ -1656,11 +1726,14 @@
     int i = 0;
     void *context = NULL;
     char *unescaped;
+    char *normalized;
 
     if (xmlInputCallbackInitialized == 0)
 	xmlRegisterDefaultInputCallbacks();
 
     if (URI == NULL) return(NULL);
+    normalized = (char *) xmlNormalizeWindowsPath((const xmlChar *)URI);
+    if (normalized == NULL) return(NULL);
 
 #ifdef LIBXML_CATALOG_ENABLED
 #endif
@@ -1670,7 +1743,7 @@
      * Go in reverse to give precedence to user defined handlers.
      * try with an unescaped version of the URI
      */
-    unescaped = xmlURIUnescapeString(URI, 0, NULL);
+    unescaped = xmlURIUnescapeString((char *) normalized, 0, NULL);
     if (unescaped != NULL) {
 	for (i = xmlInputCallbackNr - 1;i >= 0;i--) {
 	    if ((xmlInputCallbackTable[i].matchcallback != NULL) &&
@@ -1691,12 +1764,13 @@
 	for (i = xmlInputCallbackNr - 1;i >= 0;i--) {
 	    if ((xmlInputCallbackTable[i].matchcallback != NULL) &&
 		(xmlInputCallbackTable[i].matchcallback(URI) != 0)) {
-		context = xmlInputCallbackTable[i].opencallback(URI);
+		context = xmlInputCallbackTable[i].opencallback(normalized);
 		if (context != NULL)
 		    break;
 	    }
 	}
     }
+    xmlFree(normalized);
     if (context == NULL) {
 	return(NULL);
     }
@@ -1736,6 +1810,7 @@
     int i = 0;
     void *context = NULL;
     char *unescaped;
+    char *normalized;
 
     int is_http_uri = 0;	/*   Can't change if HTTP disabled  */
 
@@ -1743,11 +1818,13 @@
 	xmlRegisterDefaultOutputCallbacks();
 
     if (URI == NULL) return(NULL);
+    normalized = (char *) xmlNormalizeWindowsPath((const xmlChar *)URI);
+    if (normalized == NULL) return(NULL);
 
 #ifdef LIBXML_HTTP_ENABLED
     /*  Need to prevent HTTP URI's from falling into zlib short circuit  */
 
-    is_http_uri = xmlIOHTTPMatch( URI );
+    is_http_uri = xmlIOHTTPMatch( normalized );
 #endif
 
 
@@ -1756,7 +1833,7 @@
      * Go in reverse to give precedence to user defined handlers.
      * try with an unescaped version of the URI
      */
-    unescaped = xmlURIUnescapeString(URI, 0, NULL);
+    unescaped = xmlURIUnescapeString(normalized, 0, NULL);
     if (unescaped != NULL) {
 #ifdef HAVE_ZLIB_H
 	if ((compression > 0) && (compression <= 9) && (is_http_uri == 0)) {
@@ -1769,6 +1846,7 @@
 		    ret->closecallback = xmlGzfileClose;
 		}
 		xmlFree(unescaped);
+		xmlFree(normalized);
 		return(ret);
 	    }
 	}
@@ -1797,7 +1875,7 @@
     if (context == NULL) {
 #ifdef HAVE_ZLIB_H
 	if ((compression > 0) && (compression <= 9) && (is_http_uri == 0)) {
-	    context = xmlGzfileOpenW(URI, compression);
+	    context = xmlGzfileOpenW(normalized, compression);
 	    if (context != NULL) {
 		ret = xmlAllocOutputBuffer(encoder);
 		if (ret != NULL) {
@@ -1805,13 +1883,14 @@
 		    ret->writecallback = xmlGzfileWrite;
 		    ret->closecallback = xmlGzfileClose;
 		}
+		xmlFree(normalized);
 		return(ret);
 	    }
 	}
 #endif
 	for (i = xmlOutputCallbackNr - 1;i >= 0;i--) {
 	    if ((xmlOutputCallbackTable[i].matchcallback != NULL) &&
-		(xmlOutputCallbackTable[i].matchcallback(URI) != 0)) {
+		(xmlOutputCallbackTable[i].matchcallback(normalized) != 0)) {
 #if defined(LIBXML_HTTP_ENABLED) && defined(HAVE_ZLIB_H)
 		/*  Need to pass compression parameter into HTTP open calls  */
 		if (xmlOutputCallbackTable[i].matchcallback == xmlIOHTTPMatch)
@@ -1824,6 +1903,7 @@
 	    }
 	}
     }
+    xmlFree(normalized);
 
     if (context == NULL) {
 	return(NULL);
@@ -2450,8 +2530,12 @@
     if (URL == NULL)
 	return(0);
 
-    if (!xmlStrncasecmp(BAD_CAST URL, BAD_CAST "file://localhost", 16))
+    if (!xmlStrncasecmp(BAD_CAST URL, BAD_CAST "file://localhost/", 17))
+#if defined (_WIN32) && !defined(__CYGWIN__)
+	path = &URL[17];
+#else
 	path = &URL[16];
+#endif
     else if (!xmlStrncasecmp(BAD_CAST URL, BAD_CAST "file:///", 8)) {
 #if defined (_WIN32) && !defined(__CYGWIN__)
 	path = &URL[8];
@@ -2639,8 +2723,12 @@
     if (URL == NULL)
         return (0);
 
-    if (!xmlStrncasecmp(BAD_CAST URL, BAD_CAST "file://localhost", 16))
-        path = &URL[16];
+    if (!xmlStrncasecmp(BAD_CAST URL, BAD_CAST "file://localhost/", 17))
+#if defined (_WIN32) && !defined(__CYGWIN__)
+	path = &URL[17];
+#else
+	path = &URL[16];
+#endif
     else if (!xmlStrncasecmp(BAD_CAST URL, BAD_CAST "file:///", 8)) {
 #if defined (_WIN32) && !defined(__CYGWIN__)
         path = &URL[8];