blob: 14d56ed2ddd2b1a739c94c2d1e217de9cc1b41f6 [file] [log] [blame]
J. Duke319a3b92007-12-01 00:00:00 +00001/*
2 * Copyright 1996 Sun Microsystems, Inc. All Rights Reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation. Sun designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Sun in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
22 * CA 95054 USA or visit www.sun.com if you need additional information or
23 * have any questions.
24 */
25
26package sun.net;
27
28/**
29 * Helper class to map URL "abbreviations" to real URLs.
30 * The default implementation supports the following mappings:
31 * ftp.mumble.bar/... => ftp://ftp.mumble.bar/...
32 * gopher.mumble.bar/... => gopher://gopher.mumble.bar/...
33 * other.name.dom/... => http://other.name.dom/...
34 * /foo/... => file:/foo/...
35 *
36 * Full URLs (those including a protocol name) are passed through unchanged.
37 *
38 * Subclassers can override or extend this behavior to support different
39 * or additional canonicalization policies.
40 *
41 * @author Steve Byrne
42 */
43
44public class URLCanonicalizer {
45 /**
46 * Creates the default canonicalizer instance.
47 */
48 public URLCanonicalizer() { }
49
50 /**
51 * Given a possibly abbreviated URL (missing a protocol name, typically),
52 * this method's job is to transform that URL into a canonical form,
53 * by including a protocol name and additional syntax, if necessary.
54 *
55 * For a correctly formed URL, this method should just return its argument.
56 */
57 public String canonicalize(String simpleURL) {
58 String resultURL = simpleURL;
59 if (simpleURL.startsWith("ftp.")) {
60 resultURL = "ftp://" + simpleURL;
61 } else if (simpleURL.startsWith("gopher.")) {
62 resultURL = "gopher://" + simpleURL;
63 } else if (simpleURL.startsWith("/")) {
64 resultURL = "file:" + simpleURL;
65 } else if (!hasProtocolName(simpleURL)) {
66 if (isSimpleHostName(simpleURL)) {
67 simpleURL = "www." + simpleURL + ".com";
68 }
69 resultURL = "http://" + simpleURL;
70 }
71
72 return resultURL;
73 }
74
75 /**
76 * Given a possibly abbreviated URL, this predicate function returns
77 * true if it appears that the URL contains a protocol name
78 */
79 public boolean hasProtocolName(String url) {
80 int index = url.indexOf(':');
81 if (index <= 0) { // treat ":foo" as not having a protocol spec
82 return false;
83 }
84
85 for (int i = 0; i < index; i++) {
86 char c = url.charAt(i);
87
88 // REMIND: this is a guess at legal characters in a protocol --
89 // need to be verified
90 if ((c >= 'A' && c <= 'Z')
91 || (c >= 'a' && c <= 'z')
92 || (c == '-')) {
93 continue;
94 }
95
96 // found an illegal character
97 return false;
98 }
99
100 return true;
101 }
102
103 /**
104 * Returns true if the URL is just a single name, no periods or
105 * slashes, false otherwise
106 **/
107 protected boolean isSimpleHostName(String url) {
108
109 for (int i = 0; i < url.length(); i++) {
110 char c = url.charAt(i);
111
112 // REMIND: this is a guess at legal characters in a protocol --
113 // need to be verified
114 if ((c >= 'A' && c <= 'Z')
115 || (c >= 'a' && c <= 'z')
116 || (c >= '0' && c <= '9')
117 || (c == '-')) {
118 continue;
119 }
120
121 // found an illegal character
122 return false;
123 }
124
125 return true;
126 }
127}