The Android Open Source Project | 9066cfe | 2009-03-03 19:31:44 -0800 | [diff] [blame] | 1 | /* |
| 2 | * Copyright (C) 2007 The Android Open Source Project |
| 3 | * |
| 4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | * you may not use this file except in compliance with the License. |
| 6 | * You may obtain a copy of the License at |
| 7 | * |
| 8 | * http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | * |
| 10 | * Unless required by applicable law or agreed to in writing, software |
| 11 | * distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | * See the License for the specific language governing permissions and |
| 14 | * limitations under the License. |
| 15 | */ |
| 16 | |
| 17 | package android.net; |
| 18 | |
| 19 | import java.util.ArrayList; |
| 20 | import java.util.HashMap; |
| 21 | import java.util.List; |
Elliott Hughes | cb64d43 | 2013-08-02 10:00:44 -0700 | [diff] [blame] | 22 | import java.util.Locale; |
The Android Open Source Project | 9066cfe | 2009-03-03 19:31:44 -0800 | [diff] [blame] | 23 | import java.util.Set; |
| 24 | import java.util.StringTokenizer; |
| 25 | |
| 26 | /** |
| 27 | * |
| 28 | * Sanitizes the Query portion of a URL. Simple example: |
| 29 | * <code> |
| 30 | * UrlQuerySanitizer sanitizer = new UrlQuerySanitizer(); |
| 31 | * sanitizer.setAllowUnregisteredParamaters(true); |
| 32 | * sanitizer.parseUrl("http://example.com/?name=Joe+User"); |
| 33 | * String name = sanitizer.getValue("name")); |
| 34 | * // name now contains "Joe_User" |
| 35 | * </code> |
| 36 | * |
| 37 | * Register ValueSanitizers to customize the way individual |
| 38 | * parameters are sanitized: |
| 39 | * <code> |
| 40 | * UrlQuerySanitizer sanitizer = new UrlQuerySanitizer(); |
| 41 | * sanitizer.registerParamater("name", UrlQuerySanitizer.createSpaceLegal()); |
| 42 | * sanitizer.parseUrl("http://example.com/?name=Joe+User"); |
| 43 | * String name = sanitizer.getValue("name")); |
| 44 | * // name now contains "Joe User". (The string is first decoded, which |
| 45 | * // converts the '+' to a ' '. Then the string is sanitized, which |
| 46 | * // converts the ' ' to an '_'. (The ' ' is converted because the default |
| 47 | * unregistered parameter sanitizer does not allow any special characters, |
| 48 | * and ' ' is a special character.) |
| 49 | * </code> |
| 50 | * |
| 51 | * There are several ways to create ValueSanitizers. In order of increasing |
| 52 | * sophistication: |
| 53 | * <ol> |
| 54 | * <li>Call one of the UrlQuerySanitizer.createXXX() methods. |
| 55 | * <li>Construct your own instance of |
| 56 | * UrlQuerySanitizer.IllegalCharacterValueSanitizer. |
| 57 | * <li>Subclass UrlQuerySanitizer.ValueSanitizer to define your own value |
| 58 | * sanitizer. |
| 59 | * </ol> |
| 60 | * |
| 61 | */ |
| 62 | public class UrlQuerySanitizer { |
| 63 | |
| 64 | /** |
| 65 | * A simple tuple that holds parameter-value pairs. |
| 66 | * |
| 67 | */ |
| 68 | public class ParameterValuePair { |
| 69 | /** |
| 70 | * Construct a parameter-value tuple. |
| 71 | * @param parameter an unencoded parameter |
| 72 | * @param value an unencoded value |
| 73 | */ |
| 74 | public ParameterValuePair(String parameter, |
| 75 | String value) { |
| 76 | mParameter = parameter; |
| 77 | mValue = value; |
| 78 | } |
| 79 | /** |
| 80 | * The unencoded parameter |
| 81 | */ |
| 82 | public String mParameter; |
| 83 | /** |
| 84 | * The unencoded value |
| 85 | */ |
| 86 | public String mValue; |
| 87 | } |
| 88 | |
| 89 | final private HashMap<String, ValueSanitizer> mSanitizers = |
| 90 | new HashMap<String, ValueSanitizer>(); |
| 91 | final private HashMap<String, String> mEntries = |
| 92 | new HashMap<String, String>(); |
| 93 | final private ArrayList<ParameterValuePair> mEntriesList = |
| 94 | new ArrayList<ParameterValuePair>(); |
| 95 | private boolean mAllowUnregisteredParamaters; |
| 96 | private boolean mPreferFirstRepeatedParameter; |
| 97 | private ValueSanitizer mUnregisteredParameterValueSanitizer = |
| 98 | getAllIllegal(); |
| 99 | |
| 100 | /** |
| 101 | * A functor used to sanitize a single query value. |
| 102 | * |
| 103 | */ |
| 104 | public static interface ValueSanitizer { |
| 105 | /** |
| 106 | * Sanitize an unencoded value. |
| 107 | * @param value |
| 108 | * @return the sanitized unencoded value |
| 109 | */ |
| 110 | public String sanitize(String value); |
| 111 | } |
| 112 | |
| 113 | /** |
| 114 | * Sanitize values based on which characters they contain. Illegal |
| 115 | * characters are replaced with either space or '_', depending upon |
| 116 | * whether space is a legal character or not. |
| 117 | */ |
| 118 | public static class IllegalCharacterValueSanitizer implements |
| 119 | ValueSanitizer { |
| 120 | private int mFlags; |
| 121 | |
| 122 | /** |
| 123 | * Allow space (' ') characters. |
| 124 | */ |
| 125 | public final static int SPACE_OK = 1 << 0; |
| 126 | /** |
| 127 | * Allow whitespace characters other than space. The |
| 128 | * other whitespace characters are |
| 129 | * '\t' '\f' '\n' '\r' and '\0x000b' (vertical tab) |
| 130 | */ |
| 131 | public final static int OTHER_WHITESPACE_OK = 1 << 1; |
| 132 | /** |
| 133 | * Allow characters with character codes 128 to 255. |
| 134 | */ |
| 135 | public final static int NON_7_BIT_ASCII_OK = 1 << 2; |
| 136 | /** |
| 137 | * Allow double quote characters. ('"') |
| 138 | */ |
| 139 | public final static int DQUOTE_OK = 1 << 3; |
| 140 | /** |
| 141 | * Allow single quote characters. ('\'') |
| 142 | */ |
| 143 | public final static int SQUOTE_OK = 1 << 4; |
| 144 | /** |
| 145 | * Allow less-than characters. ('<') |
| 146 | */ |
| 147 | public final static int LT_OK = 1 << 5; |
| 148 | /** |
| 149 | * Allow greater-than characters. ('>') |
| 150 | */ |
| 151 | public final static int GT_OK = 1 << 6; |
| 152 | /** |
| 153 | * Allow ampersand characters ('&') |
| 154 | */ |
| 155 | public final static int AMP_OK = 1 << 7; |
| 156 | /** |
| 157 | * Allow percent-sign characters ('%') |
| 158 | */ |
| 159 | public final static int PCT_OK = 1 << 8; |
| 160 | /** |
| 161 | * Allow nul characters ('\0') |
| 162 | */ |
| 163 | public final static int NUL_OK = 1 << 9; |
| 164 | /** |
| 165 | * Allow text to start with a script URL |
| 166 | * such as "javascript:" or "vbscript:" |
| 167 | */ |
| 168 | public final static int SCRIPT_URL_OK = 1 << 10; |
| 169 | |
| 170 | /** |
| 171 | * Mask with all fields set to OK |
| 172 | */ |
| 173 | public final static int ALL_OK = 0x7ff; |
| 174 | |
| 175 | /** |
| 176 | * Mask with both regular space and other whitespace OK |
| 177 | */ |
| 178 | public final static int ALL_WHITESPACE_OK = |
| 179 | SPACE_OK | OTHER_WHITESPACE_OK; |
| 180 | |
| 181 | |
| 182 | // Common flag combinations: |
| 183 | |
| 184 | /** |
| 185 | * <ul> |
| 186 | * <li>Deny all special characters. |
| 187 | * <li>Deny script URLs. |
| 188 | * </ul> |
| 189 | */ |
| 190 | public final static int ALL_ILLEGAL = |
| 191 | 0; |
| 192 | /** |
| 193 | * <ul> |
| 194 | * <li>Allow all special characters except Nul. ('\0'). |
| 195 | * <li>Allow script URLs. |
| 196 | * </ul> |
| 197 | */ |
| 198 | public final static int ALL_BUT_NUL_LEGAL = |
| 199 | ALL_OK & ~NUL_OK; |
| 200 | /** |
| 201 | * <ul> |
| 202 | * <li>Allow all special characters except for: |
| 203 | * <ul> |
| 204 | * <li>whitespace characters |
| 205 | * <li>Nul ('\0') |
| 206 | * </ul> |
| 207 | * <li>Allow script URLs. |
| 208 | * </ul> |
| 209 | */ |
| 210 | public final static int ALL_BUT_WHITESPACE_LEGAL = |
| 211 | ALL_OK & ~(ALL_WHITESPACE_OK | NUL_OK); |
| 212 | /** |
| 213 | * <ul> |
| 214 | * <li>Allow characters used by encoded URLs. |
| 215 | * <li>Deny script URLs. |
| 216 | * </ul> |
| 217 | */ |
| 218 | public final static int URL_LEGAL = |
| 219 | NON_7_BIT_ASCII_OK | SQUOTE_OK | AMP_OK | PCT_OK; |
| 220 | /** |
| 221 | * <ul> |
| 222 | * <li>Allow characters used by encoded URLs. |
| 223 | * <li>Allow spaces. |
| 224 | * <li>Deny script URLs. |
| 225 | * </ul> |
| 226 | */ |
| 227 | public final static int URL_AND_SPACE_LEGAL = |
| 228 | URL_LEGAL | SPACE_OK; |
| 229 | /** |
| 230 | * <ul> |
| 231 | * <li>Allow ampersand. |
| 232 | * <li>Deny script URLs. |
| 233 | * </ul> |
| 234 | */ |
| 235 | public final static int AMP_LEGAL = |
| 236 | AMP_OK; |
| 237 | /** |
| 238 | * <ul> |
| 239 | * <li>Allow ampersand. |
| 240 | * <li>Allow space. |
| 241 | * <li>Deny script URLs. |
| 242 | * </ul> |
| 243 | */ |
| 244 | public final static int AMP_AND_SPACE_LEGAL = |
| 245 | AMP_OK | SPACE_OK; |
| 246 | /** |
| 247 | * <ul> |
| 248 | * <li>Allow space. |
| 249 | * <li>Deny script URLs. |
| 250 | * </ul> |
| 251 | */ |
| 252 | public final static int SPACE_LEGAL = |
| 253 | SPACE_OK; |
| 254 | /** |
| 255 | * <ul> |
| 256 | * <li>Allow all but. |
| 257 | * <ul> |
| 258 | * <li>Nul ('\0') |
| 259 | * <li>Angle brackets ('<', '>') |
| 260 | * </ul> |
| 261 | * <li>Deny script URLs. |
| 262 | * </ul> |
| 263 | */ |
| 264 | public final static int ALL_BUT_NUL_AND_ANGLE_BRACKETS_LEGAL = |
| 265 | ALL_OK & ~(NUL_OK | LT_OK | GT_OK); |
| 266 | |
| 267 | /** |
| 268 | * Script URL definitions |
| 269 | */ |
| 270 | |
| 271 | private final static String JAVASCRIPT_PREFIX = "javascript:"; |
| 272 | |
| 273 | private final static String VBSCRIPT_PREFIX = "vbscript:"; |
| 274 | |
| 275 | private final static int MIN_SCRIPT_PREFIX_LENGTH = Math.min( |
| 276 | JAVASCRIPT_PREFIX.length(), VBSCRIPT_PREFIX.length()); |
| 277 | |
| 278 | /** |
| 279 | * Construct a sanitizer. The parameters set the behavior of the |
| 280 | * sanitizer. |
| 281 | * @param flags some combination of the XXX_OK flags. |
| 282 | */ |
| 283 | public IllegalCharacterValueSanitizer( |
| 284 | int flags) { |
| 285 | mFlags = flags; |
| 286 | } |
| 287 | /** |
| 288 | * Sanitize a value. |
| 289 | * <ol> |
| 290 | * <li>If script URLs are not OK, the will be removed. |
| 291 | * <li>If neither spaces nor other white space is OK, then |
| 292 | * white space will be trimmed from the beginning and end of |
| 293 | * the URL. (Just the actual white space characters are trimmed, not |
| 294 | * other control codes.) |
| 295 | * <li> Illegal characters will be replaced with |
| 296 | * either ' ' or '_', depending on whether a space is itself a |
| 297 | * legal character. |
| 298 | * </ol> |
| 299 | * @param value |
| 300 | * @return the sanitized value |
| 301 | */ |
| 302 | public String sanitize(String value) { |
| 303 | if (value == null) { |
| 304 | return null; |
| 305 | } |
| 306 | int length = value.length(); |
| 307 | if ((mFlags & SCRIPT_URL_OK) != 0) { |
| 308 | if (length >= MIN_SCRIPT_PREFIX_LENGTH) { |
Elliott Hughes | cb64d43 | 2013-08-02 10:00:44 -0700 | [diff] [blame] | 309 | String asLower = value.toLowerCase(Locale.ROOT); |
The Android Open Source Project | 9066cfe | 2009-03-03 19:31:44 -0800 | [diff] [blame] | 310 | if (asLower.startsWith(JAVASCRIPT_PREFIX) || |
| 311 | asLower.startsWith(VBSCRIPT_PREFIX)) { |
| 312 | return ""; |
| 313 | } |
| 314 | } |
| 315 | } |
| 316 | |
| 317 | // If whitespace isn't OK, get rid of whitespace at beginning |
| 318 | // and end of value. |
| 319 | if ( (mFlags & ALL_WHITESPACE_OK) == 0) { |
| 320 | value = trimWhitespace(value); |
| 321 | // The length could have changed, so we need to correct |
| 322 | // the length variable. |
| 323 | length = value.length(); |
| 324 | } |
| 325 | |
| 326 | StringBuilder stringBuilder = new StringBuilder(length); |
| 327 | for(int i = 0; i < length; i++) { |
| 328 | char c = value.charAt(i); |
| 329 | if (!characterIsLegal(c)) { |
| 330 | if ((mFlags & SPACE_OK) != 0) { |
| 331 | c = ' '; |
| 332 | } |
| 333 | else { |
| 334 | c = '_'; |
| 335 | } |
| 336 | } |
| 337 | stringBuilder.append(c); |
| 338 | } |
| 339 | return stringBuilder.toString(); |
| 340 | } |
| 341 | |
| 342 | /** |
| 343 | * Trim whitespace from the beginning and end of a string. |
| 344 | * <p> |
| 345 | * Note: can't use {@link String#trim} because {@link String#trim} has a |
| 346 | * different definition of whitespace than we want. |
| 347 | * @param value the string to trim |
| 348 | * @return the trimmed string |
| 349 | */ |
| 350 | private String trimWhitespace(String value) { |
| 351 | int start = 0; |
| 352 | int last = value.length() - 1; |
| 353 | int end = last; |
| 354 | while (start <= end && isWhitespace(value.charAt(start))) { |
| 355 | start++; |
| 356 | } |
| 357 | while (end >= start && isWhitespace(value.charAt(end))) { |
| 358 | end--; |
| 359 | } |
| 360 | if (start == 0 && end == last) { |
| 361 | return value; |
| 362 | } |
| 363 | return value.substring(start, end + 1); |
| 364 | } |
| 365 | |
| 366 | /** |
| 367 | * Check if c is whitespace. |
| 368 | * @param c character to test |
| 369 | * @return true if c is a whitespace character |
| 370 | */ |
| 371 | private boolean isWhitespace(char c) { |
| 372 | switch(c) { |
| 373 | case ' ': |
| 374 | case '\t': |
| 375 | case '\f': |
| 376 | case '\n': |
| 377 | case '\r': |
| 378 | case 11: /* VT */ |
| 379 | return true; |
| 380 | default: |
| 381 | return false; |
| 382 | } |
| 383 | } |
| 384 | |
| 385 | /** |
| 386 | * Check whether an individual character is legal. Uses the |
| 387 | * flag bit-set passed into the constructor. |
| 388 | * @param c |
| 389 | * @return true if c is a legal character |
| 390 | */ |
| 391 | private boolean characterIsLegal(char c) { |
| 392 | switch(c) { |
| 393 | case ' ' : return (mFlags & SPACE_OK) != 0; |
| 394 | case '\t': case '\f': case '\n': case '\r': case 11: /* VT */ |
| 395 | return (mFlags & OTHER_WHITESPACE_OK) != 0; |
| 396 | case '\"': return (mFlags & DQUOTE_OK) != 0; |
| 397 | case '\'': return (mFlags & SQUOTE_OK) != 0; |
| 398 | case '<' : return (mFlags & LT_OK) != 0; |
| 399 | case '>' : return (mFlags & GT_OK) != 0; |
| 400 | case '&' : return (mFlags & AMP_OK) != 0; |
| 401 | case '%' : return (mFlags & PCT_OK) != 0; |
| 402 | case '\0': return (mFlags & NUL_OK) != 0; |
| 403 | default : return (c >= 32 && c < 127) || |
| 404 | ((c >= 128) && ((mFlags & NON_7_BIT_ASCII_OK) != 0)); |
| 405 | } |
| 406 | } |
| 407 | } |
| 408 | |
| 409 | /** |
| 410 | * Get the current value sanitizer used when processing |
| 411 | * unregistered parameter values. |
| 412 | * <p> |
| 413 | * <b>Note:</b> The default unregistered parameter value sanitizer is |
| 414 | * one that doesn't allow any special characters, similar to what |
| 415 | * is returned by calling createAllIllegal. |
| 416 | * |
| 417 | * @return the current ValueSanitizer used to sanitize unregistered |
| 418 | * parameter values. |
| 419 | */ |
| 420 | public ValueSanitizer getUnregisteredParameterValueSanitizer() { |
| 421 | return mUnregisteredParameterValueSanitizer; |
| 422 | } |
| 423 | |
| 424 | /** |
| 425 | * Set the value sanitizer used when processing unregistered |
| 426 | * parameter values. |
| 427 | * @param sanitizer set the ValueSanitizer used to sanitize unregistered |
| 428 | * parameter values. |
| 429 | */ |
| 430 | public void setUnregisteredParameterValueSanitizer( |
| 431 | ValueSanitizer sanitizer) { |
| 432 | mUnregisteredParameterValueSanitizer = sanitizer; |
| 433 | } |
| 434 | |
| 435 | |
| 436 | // Private fields for singleton sanitizers: |
| 437 | |
| 438 | private static final ValueSanitizer sAllIllegal = |
| 439 | new IllegalCharacterValueSanitizer( |
| 440 | IllegalCharacterValueSanitizer.ALL_ILLEGAL); |
| 441 | |
| 442 | private static final ValueSanitizer sAllButNulLegal = |
| 443 | new IllegalCharacterValueSanitizer( |
| 444 | IllegalCharacterValueSanitizer.ALL_BUT_NUL_LEGAL); |
| 445 | |
| 446 | private static final ValueSanitizer sAllButWhitespaceLegal = |
| 447 | new IllegalCharacterValueSanitizer( |
| 448 | IllegalCharacterValueSanitizer.ALL_BUT_WHITESPACE_LEGAL); |
| 449 | |
| 450 | private static final ValueSanitizer sURLLegal = |
| 451 | new IllegalCharacterValueSanitizer( |
| 452 | IllegalCharacterValueSanitizer.URL_LEGAL); |
| 453 | |
| 454 | private static final ValueSanitizer sUrlAndSpaceLegal = |
| 455 | new IllegalCharacterValueSanitizer( |
| 456 | IllegalCharacterValueSanitizer.URL_AND_SPACE_LEGAL); |
| 457 | |
| 458 | private static final ValueSanitizer sAmpLegal = |
| 459 | new IllegalCharacterValueSanitizer( |
| 460 | IllegalCharacterValueSanitizer.AMP_LEGAL); |
| 461 | |
| 462 | private static final ValueSanitizer sAmpAndSpaceLegal = |
| 463 | new IllegalCharacterValueSanitizer( |
| 464 | IllegalCharacterValueSanitizer.AMP_AND_SPACE_LEGAL); |
| 465 | |
| 466 | private static final ValueSanitizer sSpaceLegal = |
| 467 | new IllegalCharacterValueSanitizer( |
| 468 | IllegalCharacterValueSanitizer.SPACE_LEGAL); |
| 469 | |
| 470 | private static final ValueSanitizer sAllButNulAndAngleBracketsLegal = |
| 471 | new IllegalCharacterValueSanitizer( |
| 472 | IllegalCharacterValueSanitizer.ALL_BUT_NUL_AND_ANGLE_BRACKETS_LEGAL); |
| 473 | |
| 474 | /** |
| 475 | * Return a value sanitizer that does not allow any special characters, |
| 476 | * and also does not allow script URLs. |
| 477 | * @return a value sanitizer |
| 478 | */ |
| 479 | public static final ValueSanitizer getAllIllegal() { |
| 480 | return sAllIllegal; |
| 481 | } |
| 482 | |
| 483 | /** |
| 484 | * Return a value sanitizer that allows everything except Nul ('\0') |
| 485 | * characters. Script URLs are allowed. |
| 486 | * @return a value sanitizer |
| 487 | */ |
| 488 | public static final ValueSanitizer getAllButNulLegal() { |
| 489 | return sAllButNulLegal; |
| 490 | } |
| 491 | /** |
| 492 | * Return a value sanitizer that allows everything except Nul ('\0') |
| 493 | * characters, space (' '), and other whitespace characters. |
| 494 | * Script URLs are allowed. |
| 495 | * @return a value sanitizer |
| 496 | */ |
| 497 | public static final ValueSanitizer getAllButWhitespaceLegal() { |
| 498 | return sAllButWhitespaceLegal; |
| 499 | } |
| 500 | /** |
| 501 | * Return a value sanitizer that allows all the characters used by |
| 502 | * encoded URLs. Does not allow script URLs. |
| 503 | * @return a value sanitizer |
| 504 | */ |
| 505 | public static final ValueSanitizer getUrlLegal() { |
| 506 | return sURLLegal; |
| 507 | } |
| 508 | /** |
| 509 | * Return a value sanitizer that allows all the characters used by |
| 510 | * encoded URLs and allows spaces, which are not technically legal |
| 511 | * in encoded URLs, but commonly appear anyway. |
| 512 | * Does not allow script URLs. |
| 513 | * @return a value sanitizer |
| 514 | */ |
| 515 | public static final ValueSanitizer getUrlAndSpaceLegal() { |
| 516 | return sUrlAndSpaceLegal; |
| 517 | } |
| 518 | /** |
| 519 | * Return a value sanitizer that does not allow any special characters |
| 520 | * except ampersand ('&'). Does not allow script URLs. |
| 521 | * @return a value sanitizer |
| 522 | */ |
| 523 | public static final ValueSanitizer getAmpLegal() { |
| 524 | return sAmpLegal; |
| 525 | } |
| 526 | /** |
| 527 | * Return a value sanitizer that does not allow any special characters |
| 528 | * except ampersand ('&') and space (' '). Does not allow script URLs. |
| 529 | * @return a value sanitizer |
| 530 | */ |
| 531 | public static final ValueSanitizer getAmpAndSpaceLegal() { |
| 532 | return sAmpAndSpaceLegal; |
| 533 | } |
| 534 | /** |
| 535 | * Return a value sanitizer that does not allow any special characters |
| 536 | * except space (' '). Does not allow script URLs. |
| 537 | * @return a value sanitizer |
| 538 | */ |
| 539 | public static final ValueSanitizer getSpaceLegal() { |
| 540 | return sSpaceLegal; |
| 541 | } |
| 542 | /** |
| 543 | * Return a value sanitizer that allows any special characters |
| 544 | * except angle brackets ('<' and '>') and Nul ('\0'). |
| 545 | * Allows script URLs. |
| 546 | * @return a value sanitizer |
| 547 | */ |
| 548 | public static final ValueSanitizer getAllButNulAndAngleBracketsLegal() { |
| 549 | return sAllButNulAndAngleBracketsLegal; |
| 550 | } |
| 551 | |
| 552 | /** |
| 553 | * Constructs a UrlQuerySanitizer. |
| 554 | * <p> |
| 555 | * Defaults: |
| 556 | * <ul> |
| 557 | * <li>unregistered parameters are not allowed. |
| 558 | * <li>the last instance of a repeated parameter is preferred. |
| 559 | * <li>The default value sanitizer is an AllIllegal value sanitizer. |
| 560 | * <ul> |
| 561 | */ |
| 562 | public UrlQuerySanitizer() { |
| 563 | } |
| 564 | |
| 565 | /** |
| 566 | * Constructs a UrlQuerySanitizer and parse a URL. |
| 567 | * This constructor is provided for convenience when the |
| 568 | * default parsing behavior is acceptable. |
| 569 | * <p> |
| 570 | * Because the URL is parsed before the constructor returns, there isn't |
| 571 | * a chance to configure the sanitizer to change the parsing behavior. |
| 572 | * <p> |
| 573 | * <code> |
| 574 | * UrlQuerySanitizer sanitizer = new UrlQuerySanitizer(myUrl); |
| 575 | * String name = sanitizer.getValue("name"); |
| 576 | * </code> |
| 577 | * <p> |
| 578 | * Defaults: |
| 579 | * <ul> |
| 580 | * <li>unregistered parameters <em>are</em> allowed. |
| 581 | * <li>the last instance of a repeated parameter is preferred. |
| 582 | * <li>The default value sanitizer is an AllIllegal value sanitizer. |
| 583 | * <ul> |
| 584 | */ |
| 585 | public UrlQuerySanitizer(String url) { |
| 586 | setAllowUnregisteredParamaters(true); |
| 587 | parseUrl(url); |
| 588 | } |
| 589 | |
| 590 | /** |
| 591 | * Parse the query parameters out of an encoded URL. |
| 592 | * Works by extracting the query portion from the URL and then |
| 593 | * calling parseQuery(). If there is no query portion it is |
| 594 | * treated as if the query portion is an empty string. |
| 595 | * @param url the encoded URL to parse. |
| 596 | */ |
| 597 | public void parseUrl(String url) { |
| 598 | int queryIndex = url.indexOf('?'); |
| 599 | String query; |
| 600 | if (queryIndex >= 0) { |
| 601 | query = url.substring(queryIndex + 1); |
| 602 | } |
| 603 | else { |
| 604 | query = ""; |
| 605 | } |
| 606 | parseQuery(query); |
| 607 | } |
| 608 | |
| 609 | /** |
| 610 | * Parse a query. A query string is any number of parameter-value clauses |
| 611 | * separated by any non-zero number of ampersands. A parameter-value clause |
| 612 | * is a parameter followed by an equal sign, followed by a value. If the |
| 613 | * equal sign is missing, the value is assumed to be the empty string. |
| 614 | * @param query the query to parse. |
| 615 | */ |
| 616 | public void parseQuery(String query) { |
| 617 | clear(); |
| 618 | // Split by '&' |
| 619 | StringTokenizer tokenizer = new StringTokenizer(query, "&"); |
| 620 | while(tokenizer.hasMoreElements()) { |
| 621 | String attributeValuePair = tokenizer.nextToken(); |
| 622 | if (attributeValuePair.length() > 0) { |
| 623 | int assignmentIndex = attributeValuePair.indexOf('='); |
| 624 | if (assignmentIndex < 0) { |
| 625 | // No assignment found, treat as if empty value |
| 626 | parseEntry(attributeValuePair, ""); |
| 627 | } |
| 628 | else { |
| 629 | parseEntry(attributeValuePair.substring(0, assignmentIndex), |
| 630 | attributeValuePair.substring(assignmentIndex + 1)); |
| 631 | } |
| 632 | } |
| 633 | } |
| 634 | } |
| 635 | |
| 636 | /** |
| 637 | * Get a set of all of the parameters found in the sanitized query. |
| 638 | * <p> |
| 639 | * Note: Do not modify this set. Treat it as a read-only set. |
| 640 | * @return all the parameters found in the current query. |
| 641 | */ |
| 642 | public Set<String> getParameterSet() { |
| 643 | return mEntries.keySet(); |
| 644 | } |
| 645 | |
| 646 | /** |
| 647 | * An array list of all of the parameter value pairs in the sanitized |
| 648 | * query, in the order they appeared in the query. May contain duplicate |
| 649 | * parameters. |
| 650 | * <p class="note"><b>Note:</b> Do not modify this list. Treat it as a read-only list.</p> |
| 651 | */ |
| 652 | public List<ParameterValuePair> getParameterList() { |
| 653 | return mEntriesList; |
| 654 | } |
| 655 | |
| 656 | /** |
| 657 | * Check if a parameter exists in the current sanitized query. |
| 658 | * @param parameter the unencoded name of a parameter. |
| 659 | * @return true if the paramater exists in the current sanitized queary. |
| 660 | */ |
| 661 | public boolean hasParameter(String parameter) { |
| 662 | return mEntries.containsKey(parameter); |
| 663 | } |
| 664 | |
| 665 | /** |
| 666 | * Get the value for a parameter in the current sanitized query. |
| 667 | * Returns null if the parameter does not |
| 668 | * exit. |
| 669 | * @param parameter the unencoded name of a parameter. |
| 670 | * @return the sanitized unencoded value of the parameter, |
| 671 | * or null if the parameter does not exist. |
| 672 | */ |
| 673 | public String getValue(String parameter) { |
| 674 | return mEntries.get(parameter); |
| 675 | } |
| 676 | |
| 677 | /** |
| 678 | * Register a value sanitizer for a particular parameter. Can also be used |
| 679 | * to replace or remove an already-set value sanitizer. |
| 680 | * <p> |
| 681 | * Registering a non-null value sanitizer for a particular parameter |
| 682 | * makes that parameter a registered parameter. |
| 683 | * @param parameter an unencoded parameter name |
| 684 | * @param valueSanitizer the value sanitizer to use for a particular |
| 685 | * parameter. May be null in order to unregister that parameter. |
| 686 | * @see #getAllowUnregisteredParamaters() |
| 687 | */ |
| 688 | public void registerParameter(String parameter, |
| 689 | ValueSanitizer valueSanitizer) { |
| 690 | if (valueSanitizer == null) { |
| 691 | mSanitizers.remove(parameter); |
| 692 | } |
| 693 | mSanitizers.put(parameter, valueSanitizer); |
| 694 | } |
| 695 | |
| 696 | /** |
| 697 | * Register a value sanitizer for an array of parameters. |
| 698 | * @param parameters An array of unencoded parameter names. |
| 699 | * @param valueSanitizer |
| 700 | * @see #registerParameter |
| 701 | */ |
| 702 | public void registerParameters(String[] parameters, |
| 703 | ValueSanitizer valueSanitizer) { |
| 704 | int length = parameters.length; |
| 705 | for(int i = 0; i < length; i++) { |
| 706 | mSanitizers.put(parameters[i], valueSanitizer); |
| 707 | } |
| 708 | } |
| 709 | |
| 710 | /** |
| 711 | * Set whether or not unregistered parameters are allowed. If they |
| 712 | * are not allowed, then they will be dropped when a query is sanitized. |
| 713 | * <p> |
| 714 | * Defaults to false. |
| 715 | * @param allowUnregisteredParamaters true to allow unregistered parameters. |
| 716 | * @see #getAllowUnregisteredParamaters() |
| 717 | */ |
| 718 | public void setAllowUnregisteredParamaters( |
| 719 | boolean allowUnregisteredParamaters) { |
| 720 | mAllowUnregisteredParamaters = allowUnregisteredParamaters; |
| 721 | } |
| 722 | |
| 723 | /** |
| 724 | * Get whether or not unregistered parameters are allowed. If not |
| 725 | * allowed, they will be dropped when a query is parsed. |
| 726 | * @return true if unregistered parameters are allowed. |
| 727 | * @see #setAllowUnregisteredParamaters(boolean) |
| 728 | */ |
| 729 | public boolean getAllowUnregisteredParamaters() { |
| 730 | return mAllowUnregisteredParamaters; |
| 731 | } |
| 732 | |
| 733 | /** |
| 734 | * Set whether or not the first occurrence of a repeated parameter is |
| 735 | * preferred. True means the first repeated parameter is preferred. |
| 736 | * False means that the last repeated parameter is preferred. |
| 737 | * <p> |
| 738 | * The preferred parameter is the one that is returned when getParameter |
| 739 | * is called. |
| 740 | * <p> |
| 741 | * defaults to false. |
| 742 | * @param preferFirstRepeatedParameter True if the first repeated |
| 743 | * parameter is preferred. |
| 744 | * @see #getPreferFirstRepeatedParameter() |
| 745 | */ |
| 746 | public void setPreferFirstRepeatedParameter( |
| 747 | boolean preferFirstRepeatedParameter) { |
| 748 | mPreferFirstRepeatedParameter = preferFirstRepeatedParameter; |
| 749 | } |
| 750 | |
| 751 | /** |
| 752 | * Get whether or not the first occurrence of a repeated parameter is |
| 753 | * preferred. |
| 754 | * @return true if the first occurrence of a repeated parameter is |
| 755 | * preferred. |
| 756 | * @see #setPreferFirstRepeatedParameter(boolean) |
| 757 | */ |
| 758 | public boolean getPreferFirstRepeatedParameter() { |
| 759 | return mPreferFirstRepeatedParameter; |
| 760 | } |
| 761 | |
| 762 | /** |
| 763 | * Parse an escaped parameter-value pair. The default implementation |
| 764 | * unescapes both the parameter and the value, then looks up the |
| 765 | * effective value sanitizer for the parameter and uses it to sanitize |
| 766 | * the value. If all goes well then addSanitizedValue is called with |
| 767 | * the unescaped parameter and the sanitized unescaped value. |
| 768 | * @param parameter an escaped parameter |
| 769 | * @param value an unsanitzied escaped value |
| 770 | */ |
| 771 | protected void parseEntry(String parameter, String value) { |
| 772 | String unescapedParameter = unescape(parameter); |
| 773 | ValueSanitizer valueSanitizer = |
| 774 | getEffectiveValueSanitizer(unescapedParameter); |
| 775 | |
| 776 | if (valueSanitizer == null) { |
| 777 | return; |
| 778 | } |
| 779 | String unescapedValue = unescape(value); |
| 780 | String sanitizedValue = valueSanitizer.sanitize(unescapedValue); |
| 781 | addSanitizedEntry(unescapedParameter, sanitizedValue); |
| 782 | } |
| 783 | |
| 784 | /** |
| 785 | * Record a sanitized parameter-value pair. Override if you want to |
| 786 | * do additional filtering or validation. |
| 787 | * @param parameter an unescaped parameter |
| 788 | * @param value a sanitized unescaped value |
| 789 | */ |
| 790 | protected void addSanitizedEntry(String parameter, String value) { |
| 791 | mEntriesList.add( |
| 792 | new ParameterValuePair(parameter, value)); |
| 793 | if (mPreferFirstRepeatedParameter) { |
| 794 | if (mEntries.containsKey(parameter)) { |
| 795 | return; |
| 796 | } |
| 797 | } |
| 798 | mEntries.put(parameter, value); |
| 799 | } |
| 800 | |
| 801 | /** |
| 802 | * Get the value sanitizer for a parameter. Returns null if there |
| 803 | * is no value sanitizer registered for the parameter. |
| 804 | * @param parameter the unescaped parameter |
| 805 | * @return the currently registered value sanitizer for this parameter. |
| 806 | * @see #registerParameter(String, android.net.UrlQuerySanitizer.ValueSanitizer) |
| 807 | */ |
| 808 | public ValueSanitizer getValueSanitizer(String parameter) { |
| 809 | return mSanitizers.get(parameter); |
| 810 | } |
| 811 | |
| 812 | /** |
| 813 | * Get the effective value sanitizer for a parameter. Like getValueSanitizer, |
| 814 | * except if there is no value sanitizer registered for a parameter, and |
| 815 | * unregistered paramaters are allowed, then the default value sanitizer is |
| 816 | * returned. |
| 817 | * @param parameter an unescaped parameter |
| 818 | * @return the effective value sanitizer for a parameter. |
| 819 | */ |
| 820 | public ValueSanitizer getEffectiveValueSanitizer(String parameter) { |
| 821 | ValueSanitizer sanitizer = getValueSanitizer(parameter); |
| 822 | if (sanitizer == null && mAllowUnregisteredParamaters) { |
| 823 | sanitizer = getUnregisteredParameterValueSanitizer(); |
| 824 | } |
| 825 | return sanitizer; |
| 826 | } |
| 827 | |
| 828 | /** |
| 829 | * Unescape an escaped string. |
| 830 | * <ul> |
| 831 | * <li>'+' characters are replaced by |
| 832 | * ' ' characters. |
| 833 | * <li>Valid "%xx" escape sequences are replaced by the |
| 834 | * corresponding unescaped character. |
| 835 | * <li>Invalid escape sequences such as %1z", are passed through unchanged. |
| 836 | * <ol> |
| 837 | * @param string the escaped string |
| 838 | * @return the unescaped string. |
| 839 | */ |
| 840 | public String unescape(String string) { |
| 841 | // Early exit if no escaped characters. |
| 842 | int firstEscape = string.indexOf('%'); |
| 843 | if ( firstEscape < 0) { |
| 844 | firstEscape = string.indexOf('+'); |
| 845 | if (firstEscape < 0) { |
| 846 | return string; |
| 847 | } |
| 848 | } |
| 849 | |
| 850 | int length = string.length(); |
| 851 | |
| 852 | StringBuilder stringBuilder = new StringBuilder(length); |
| 853 | stringBuilder.append(string.substring(0, firstEscape)); |
| 854 | for (int i = firstEscape; i < length; i++) { |
| 855 | char c = string.charAt(i); |
| 856 | if (c == '+') { |
| 857 | c = ' '; |
| 858 | } |
| 859 | else if ( c == '%' && i + 2 < length) { |
| 860 | char c1 = string.charAt(i + 1); |
| 861 | char c2 = string.charAt(i + 2); |
| 862 | if (isHexDigit(c1) && isHexDigit(c2)) { |
| 863 | c = (char) (decodeHexDigit(c1) * 16 + decodeHexDigit(c2)); |
| 864 | i += 2; |
| 865 | } |
| 866 | } |
| 867 | stringBuilder.append(c); |
| 868 | } |
| 869 | return stringBuilder.toString(); |
| 870 | } |
| 871 | |
| 872 | /** |
| 873 | * Test if a character is a hexidecimal digit. Both upper case and lower |
| 874 | * case hex digits are allowed. |
| 875 | * @param c the character to test |
| 876 | * @return true if c is a hex digit. |
| 877 | */ |
| 878 | protected boolean isHexDigit(char c) { |
| 879 | return decodeHexDigit(c) >= 0; |
| 880 | } |
| 881 | |
| 882 | /** |
| 883 | * Convert a character that represents a hexidecimal digit into an integer. |
| 884 | * If the character is not a hexidecimal digit, then -1 is returned. |
| 885 | * Both upper case and lower case hex digits are allowed. |
| 886 | * @param c the hexidecimal digit. |
| 887 | * @return the integer value of the hexidecimal digit. |
| 888 | */ |
| 889 | |
| 890 | protected int decodeHexDigit(char c) { |
| 891 | if (c >= '0' && c <= '9') { |
| 892 | return c - '0'; |
| 893 | } |
| 894 | else if (c >= 'A' && c <= 'F') { |
| 895 | return c - 'A' + 10; |
| 896 | } |
| 897 | else if (c >= 'a' && c <= 'f') { |
| 898 | return c - 'a' + 10; |
| 899 | } |
| 900 | else { |
| 901 | return -1; |
| 902 | } |
| 903 | } |
| 904 | |
| 905 | /** |
| 906 | * Clear the existing entries. Called to get ready to parse a new |
| 907 | * query string. |
| 908 | */ |
| 909 | protected void clear() { |
| 910 | mEntries.clear(); |
| 911 | mEntriesList.clear(); |
| 912 | } |
| 913 | } |
| 914 | |