J. Duke | 319a3b9 | 2007-12-01 00:00:00 +0000 | [diff] [blame^] | 1 | /* |
| 2 | * Copyright 1996 Sun Microsystems, Inc. All Rights Reserved. |
| 3 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
| 4 | * |
| 5 | * This code is free software; you can redistribute it and/or modify it |
| 6 | * under the terms of the GNU General Public License version 2 only, as |
| 7 | * published by the Free Software Foundation. Sun designates this |
| 8 | * particular file as subject to the "Classpath" exception as provided |
| 9 | * by Sun in the LICENSE file that accompanied this code. |
| 10 | * |
| 11 | * This code is distributed in the hope that it will be useful, but WITHOUT |
| 12 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
| 13 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
| 14 | * version 2 for more details (a copy is included in the LICENSE file that |
| 15 | * accompanied this code). |
| 16 | * |
| 17 | * You should have received a copy of the GNU General Public License version |
| 18 | * 2 along with this work; if not, write to the Free Software Foundation, |
| 19 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
| 20 | * |
| 21 | * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, |
| 22 | * CA 95054 USA or visit www.sun.com if you need additional information or |
| 23 | * have any questions. |
| 24 | */ |
| 25 | |
| 26 | package sun.net; |
| 27 | |
| 28 | /** |
| 29 | * Helper class to map URL "abbreviations" to real URLs. |
| 30 | * The default implementation supports the following mappings: |
| 31 | * ftp.mumble.bar/... => ftp://ftp.mumble.bar/... |
| 32 | * gopher.mumble.bar/... => gopher://gopher.mumble.bar/... |
| 33 | * other.name.dom/... => http://other.name.dom/... |
| 34 | * /foo/... => file:/foo/... |
| 35 | * |
| 36 | * Full URLs (those including a protocol name) are passed through unchanged. |
| 37 | * |
| 38 | * Subclassers can override or extend this behavior to support different |
| 39 | * or additional canonicalization policies. |
| 40 | * |
| 41 | * @author Steve Byrne |
| 42 | */ |
| 43 | |
| 44 | public class URLCanonicalizer { |
| 45 | /** |
| 46 | * Creates the default canonicalizer instance. |
| 47 | */ |
| 48 | public URLCanonicalizer() { } |
| 49 | |
| 50 | /** |
| 51 | * Given a possibly abbreviated URL (missing a protocol name, typically), |
| 52 | * this method's job is to transform that URL into a canonical form, |
| 53 | * by including a protocol name and additional syntax, if necessary. |
| 54 | * |
| 55 | * For a correctly formed URL, this method should just return its argument. |
| 56 | */ |
| 57 | public String canonicalize(String simpleURL) { |
| 58 | String resultURL = simpleURL; |
| 59 | if (simpleURL.startsWith("ftp.")) { |
| 60 | resultURL = "ftp://" + simpleURL; |
| 61 | } else if (simpleURL.startsWith("gopher.")) { |
| 62 | resultURL = "gopher://" + simpleURL; |
| 63 | } else if (simpleURL.startsWith("/")) { |
| 64 | resultURL = "file:" + simpleURL; |
| 65 | } else if (!hasProtocolName(simpleURL)) { |
| 66 | if (isSimpleHostName(simpleURL)) { |
| 67 | simpleURL = "www." + simpleURL + ".com"; |
| 68 | } |
| 69 | resultURL = "http://" + simpleURL; |
| 70 | } |
| 71 | |
| 72 | return resultURL; |
| 73 | } |
| 74 | |
| 75 | /** |
| 76 | * Given a possibly abbreviated URL, this predicate function returns |
| 77 | * true if it appears that the URL contains a protocol name |
| 78 | */ |
| 79 | public boolean hasProtocolName(String url) { |
| 80 | int index = url.indexOf(':'); |
| 81 | if (index <= 0) { // treat ":foo" as not having a protocol spec |
| 82 | return false; |
| 83 | } |
| 84 | |
| 85 | for (int i = 0; i < index; i++) { |
| 86 | char c = url.charAt(i); |
| 87 | |
| 88 | // REMIND: this is a guess at legal characters in a protocol -- |
| 89 | // need to be verified |
| 90 | if ((c >= 'A' && c <= 'Z') |
| 91 | || (c >= 'a' && c <= 'z') |
| 92 | || (c == '-')) { |
| 93 | continue; |
| 94 | } |
| 95 | |
| 96 | // found an illegal character |
| 97 | return false; |
| 98 | } |
| 99 | |
| 100 | return true; |
| 101 | } |
| 102 | |
| 103 | /** |
| 104 | * Returns true if the URL is just a single name, no periods or |
| 105 | * slashes, false otherwise |
| 106 | **/ |
| 107 | protected boolean isSimpleHostName(String url) { |
| 108 | |
| 109 | for (int i = 0; i < url.length(); i++) { |
| 110 | char c = url.charAt(i); |
| 111 | |
| 112 | // REMIND: this is a guess at legal characters in a protocol -- |
| 113 | // need to be verified |
| 114 | if ((c >= 'A' && c <= 'Z') |
| 115 | || (c >= 'a' && c <= 'z') |
| 116 | || (c >= '0' && c <= '9') |
| 117 | || (c == '-')) { |
| 118 | continue; |
| 119 | } |
| 120 | |
| 121 | // found an illegal character |
| 122 | return false; |
| 123 | } |
| 124 | |
| 125 | return true; |
| 126 | } |
| 127 | } |