blob: d2073b4dfd3ad4c6859e5164af3fcfebbc6ed27d [file] [log] [blame]
The Android Open Source Project9066cfe2009-03-03 19:31:44 -08001/*
2 * Copyright (C) 2007 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17package android.net;
18
19import java.util.ArrayList;
20import java.util.HashMap;
21import java.util.List;
Elliott Hughescb64d432013-08-02 10:00:44 -070022import java.util.Locale;
The Android Open Source Project9066cfe2009-03-03 19:31:44 -080023import java.util.Set;
24import java.util.StringTokenizer;
25
26/**
27 *
28 * Sanitizes the Query portion of a URL. Simple example:
29 * <code>
30 * UrlQuerySanitizer sanitizer = new UrlQuerySanitizer();
31 * sanitizer.setAllowUnregisteredParamaters(true);
32 * sanitizer.parseUrl("http://example.com/?name=Joe+User");
33 * String name = sanitizer.getValue("name"));
34 * // name now contains "Joe_User"
35 * </code>
36 *
37 * Register ValueSanitizers to customize the way individual
38 * parameters are sanitized:
39 * <code>
40 * UrlQuerySanitizer sanitizer = new UrlQuerySanitizer();
41 * sanitizer.registerParamater("name", UrlQuerySanitizer.createSpaceLegal());
42 * sanitizer.parseUrl("http://example.com/?name=Joe+User");
43 * String name = sanitizer.getValue("name"));
44 * // name now contains "Joe User". (The string is first decoded, which
45 * // converts the '+' to a ' '. Then the string is sanitized, which
46 * // converts the ' ' to an '_'. (The ' ' is converted because the default
47 * unregistered parameter sanitizer does not allow any special characters,
48 * and ' ' is a special character.)
49 * </code>
50 *
51 * There are several ways to create ValueSanitizers. In order of increasing
52 * sophistication:
53 * <ol>
54 * <li>Call one of the UrlQuerySanitizer.createXXX() methods.
55 * <li>Construct your own instance of
56 * UrlQuerySanitizer.IllegalCharacterValueSanitizer.
57 * <li>Subclass UrlQuerySanitizer.ValueSanitizer to define your own value
58 * sanitizer.
59 * </ol>
60 *
61 */
62public class UrlQuerySanitizer {
63
64 /**
65 * A simple tuple that holds parameter-value pairs.
66 *
67 */
68 public class ParameterValuePair {
69 /**
70 * Construct a parameter-value tuple.
71 * @param parameter an unencoded parameter
72 * @param value an unencoded value
73 */
74 public ParameterValuePair(String parameter,
75 String value) {
76 mParameter = parameter;
77 mValue = value;
78 }
79 /**
80 * The unencoded parameter
81 */
82 public String mParameter;
83 /**
84 * The unencoded value
85 */
86 public String mValue;
87 }
88
89 final private HashMap<String, ValueSanitizer> mSanitizers =
90 new HashMap<String, ValueSanitizer>();
91 final private HashMap<String, String> mEntries =
92 new HashMap<String, String>();
93 final private ArrayList<ParameterValuePair> mEntriesList =
94 new ArrayList<ParameterValuePair>();
95 private boolean mAllowUnregisteredParamaters;
96 private boolean mPreferFirstRepeatedParameter;
97 private ValueSanitizer mUnregisteredParameterValueSanitizer =
98 getAllIllegal();
99
100 /**
101 * A functor used to sanitize a single query value.
102 *
103 */
104 public static interface ValueSanitizer {
105 /**
106 * Sanitize an unencoded value.
107 * @param value
108 * @return the sanitized unencoded value
109 */
110 public String sanitize(String value);
111 }
112
113 /**
114 * Sanitize values based on which characters they contain. Illegal
115 * characters are replaced with either space or '_', depending upon
116 * whether space is a legal character or not.
117 */
118 public static class IllegalCharacterValueSanitizer implements
119 ValueSanitizer {
120 private int mFlags;
121
122 /**
123 * Allow space (' ') characters.
124 */
125 public final static int SPACE_OK = 1 << 0;
126 /**
127 * Allow whitespace characters other than space. The
128 * other whitespace characters are
129 * '\t' '\f' '\n' '\r' and '\0x000b' (vertical tab)
130 */
131 public final static int OTHER_WHITESPACE_OK = 1 << 1;
132 /**
133 * Allow characters with character codes 128 to 255.
134 */
135 public final static int NON_7_BIT_ASCII_OK = 1 << 2;
136 /**
137 * Allow double quote characters. ('"')
138 */
139 public final static int DQUOTE_OK = 1 << 3;
140 /**
141 * Allow single quote characters. ('\'')
142 */
143 public final static int SQUOTE_OK = 1 << 4;
144 /**
145 * Allow less-than characters. ('<')
146 */
147 public final static int LT_OK = 1 << 5;
148 /**
149 * Allow greater-than characters. ('>')
150 */
151 public final static int GT_OK = 1 << 6;
152 /**
153 * Allow ampersand characters ('&')
154 */
155 public final static int AMP_OK = 1 << 7;
156 /**
157 * Allow percent-sign characters ('%')
158 */
159 public final static int PCT_OK = 1 << 8;
160 /**
161 * Allow nul characters ('\0')
162 */
163 public final static int NUL_OK = 1 << 9;
164 /**
165 * Allow text to start with a script URL
166 * such as "javascript:" or "vbscript:"
167 */
168 public final static int SCRIPT_URL_OK = 1 << 10;
169
170 /**
171 * Mask with all fields set to OK
172 */
173 public final static int ALL_OK = 0x7ff;
174
175 /**
176 * Mask with both regular space and other whitespace OK
177 */
178 public final static int ALL_WHITESPACE_OK =
179 SPACE_OK | OTHER_WHITESPACE_OK;
180
181
182 // Common flag combinations:
183
184 /**
185 * <ul>
186 * <li>Deny all special characters.
187 * <li>Deny script URLs.
188 * </ul>
189 */
190 public final static int ALL_ILLEGAL =
191 0;
192 /**
193 * <ul>
194 * <li>Allow all special characters except Nul. ('\0').
195 * <li>Allow script URLs.
196 * </ul>
197 */
198 public final static int ALL_BUT_NUL_LEGAL =
199 ALL_OK & ~NUL_OK;
200 /**
201 * <ul>
202 * <li>Allow all special characters except for:
203 * <ul>
204 * <li>whitespace characters
205 * <li>Nul ('\0')
206 * </ul>
207 * <li>Allow script URLs.
208 * </ul>
209 */
210 public final static int ALL_BUT_WHITESPACE_LEGAL =
211 ALL_OK & ~(ALL_WHITESPACE_OK | NUL_OK);
212 /**
213 * <ul>
214 * <li>Allow characters used by encoded URLs.
215 * <li>Deny script URLs.
216 * </ul>
217 */
218 public final static int URL_LEGAL =
219 NON_7_BIT_ASCII_OK | SQUOTE_OK | AMP_OK | PCT_OK;
220 /**
221 * <ul>
222 * <li>Allow characters used by encoded URLs.
223 * <li>Allow spaces.
224 * <li>Deny script URLs.
225 * </ul>
226 */
227 public final static int URL_AND_SPACE_LEGAL =
228 URL_LEGAL | SPACE_OK;
229 /**
230 * <ul>
231 * <li>Allow ampersand.
232 * <li>Deny script URLs.
233 * </ul>
234 */
235 public final static int AMP_LEGAL =
236 AMP_OK;
237 /**
238 * <ul>
239 * <li>Allow ampersand.
240 * <li>Allow space.
241 * <li>Deny script URLs.
242 * </ul>
243 */
244 public final static int AMP_AND_SPACE_LEGAL =
245 AMP_OK | SPACE_OK;
246 /**
247 * <ul>
248 * <li>Allow space.
249 * <li>Deny script URLs.
250 * </ul>
251 */
252 public final static int SPACE_LEGAL =
253 SPACE_OK;
254 /**
255 * <ul>
256 * <li>Allow all but.
257 * <ul>
258 * <li>Nul ('\0')
259 * <li>Angle brackets ('<', '>')
260 * </ul>
261 * <li>Deny script URLs.
262 * </ul>
263 */
264 public final static int ALL_BUT_NUL_AND_ANGLE_BRACKETS_LEGAL =
265 ALL_OK & ~(NUL_OK | LT_OK | GT_OK);
266
267 /**
268 * Script URL definitions
269 */
270
271 private final static String JAVASCRIPT_PREFIX = "javascript:";
272
273 private final static String VBSCRIPT_PREFIX = "vbscript:";
274
275 private final static int MIN_SCRIPT_PREFIX_LENGTH = Math.min(
276 JAVASCRIPT_PREFIX.length(), VBSCRIPT_PREFIX.length());
277
278 /**
279 * Construct a sanitizer. The parameters set the behavior of the
280 * sanitizer.
281 * @param flags some combination of the XXX_OK flags.
282 */
283 public IllegalCharacterValueSanitizer(
284 int flags) {
285 mFlags = flags;
286 }
287 /**
288 * Sanitize a value.
289 * <ol>
290 * <li>If script URLs are not OK, the will be removed.
291 * <li>If neither spaces nor other white space is OK, then
292 * white space will be trimmed from the beginning and end of
293 * the URL. (Just the actual white space characters are trimmed, not
294 * other control codes.)
295 * <li> Illegal characters will be replaced with
296 * either ' ' or '_', depending on whether a space is itself a
297 * legal character.
298 * </ol>
299 * @param value
300 * @return the sanitized value
301 */
302 public String sanitize(String value) {
303 if (value == null) {
304 return null;
305 }
306 int length = value.length();
307 if ((mFlags & SCRIPT_URL_OK) != 0) {
308 if (length >= MIN_SCRIPT_PREFIX_LENGTH) {
Elliott Hughescb64d432013-08-02 10:00:44 -0700309 String asLower = value.toLowerCase(Locale.ROOT);
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800310 if (asLower.startsWith(JAVASCRIPT_PREFIX) ||
311 asLower.startsWith(VBSCRIPT_PREFIX)) {
312 return "";
313 }
314 }
315 }
316
317 // If whitespace isn't OK, get rid of whitespace at beginning
318 // and end of value.
319 if ( (mFlags & ALL_WHITESPACE_OK) == 0) {
320 value = trimWhitespace(value);
321 // The length could have changed, so we need to correct
322 // the length variable.
323 length = value.length();
324 }
325
326 StringBuilder stringBuilder = new StringBuilder(length);
327 for(int i = 0; i < length; i++) {
328 char c = value.charAt(i);
329 if (!characterIsLegal(c)) {
330 if ((mFlags & SPACE_OK) != 0) {
331 c = ' ';
332 }
333 else {
334 c = '_';
335 }
336 }
337 stringBuilder.append(c);
338 }
339 return stringBuilder.toString();
340 }
341
342 /**
343 * Trim whitespace from the beginning and end of a string.
344 * <p>
345 * Note: can't use {@link String#trim} because {@link String#trim} has a
346 * different definition of whitespace than we want.
347 * @param value the string to trim
348 * @return the trimmed string
349 */
350 private String trimWhitespace(String value) {
351 int start = 0;
352 int last = value.length() - 1;
353 int end = last;
354 while (start <= end && isWhitespace(value.charAt(start))) {
355 start++;
356 }
357 while (end >= start && isWhitespace(value.charAt(end))) {
358 end--;
359 }
360 if (start == 0 && end == last) {
361 return value;
362 }
363 return value.substring(start, end + 1);
364 }
365
366 /**
367 * Check if c is whitespace.
368 * @param c character to test
369 * @return true if c is a whitespace character
370 */
371 private boolean isWhitespace(char c) {
372 switch(c) {
373 case ' ':
374 case '\t':
375 case '\f':
376 case '\n':
377 case '\r':
378 case 11: /* VT */
379 return true;
380 default:
381 return false;
382 }
383 }
384
385 /**
386 * Check whether an individual character is legal. Uses the
387 * flag bit-set passed into the constructor.
388 * @param c
389 * @return true if c is a legal character
390 */
391 private boolean characterIsLegal(char c) {
392 switch(c) {
393 case ' ' : return (mFlags & SPACE_OK) != 0;
394 case '\t': case '\f': case '\n': case '\r': case 11: /* VT */
395 return (mFlags & OTHER_WHITESPACE_OK) != 0;
396 case '\"': return (mFlags & DQUOTE_OK) != 0;
397 case '\'': return (mFlags & SQUOTE_OK) != 0;
398 case '<' : return (mFlags & LT_OK) != 0;
399 case '>' : return (mFlags & GT_OK) != 0;
400 case '&' : return (mFlags & AMP_OK) != 0;
401 case '%' : return (mFlags & PCT_OK) != 0;
402 case '\0': return (mFlags & NUL_OK) != 0;
403 default : return (c >= 32 && c < 127) ||
404 ((c >= 128) && ((mFlags & NON_7_BIT_ASCII_OK) != 0));
405 }
406 }
407 }
408
409 /**
410 * Get the current value sanitizer used when processing
411 * unregistered parameter values.
412 * <p>
413 * <b>Note:</b> The default unregistered parameter value sanitizer is
414 * one that doesn't allow any special characters, similar to what
415 * is returned by calling createAllIllegal.
416 *
417 * @return the current ValueSanitizer used to sanitize unregistered
418 * parameter values.
419 */
420 public ValueSanitizer getUnregisteredParameterValueSanitizer() {
421 return mUnregisteredParameterValueSanitizer;
422 }
423
424 /**
425 * Set the value sanitizer used when processing unregistered
426 * parameter values.
427 * @param sanitizer set the ValueSanitizer used to sanitize unregistered
428 * parameter values.
429 */
430 public void setUnregisteredParameterValueSanitizer(
431 ValueSanitizer sanitizer) {
432 mUnregisteredParameterValueSanitizer = sanitizer;
433 }
434
435
436 // Private fields for singleton sanitizers:
437
438 private static final ValueSanitizer sAllIllegal =
439 new IllegalCharacterValueSanitizer(
440 IllegalCharacterValueSanitizer.ALL_ILLEGAL);
441
442 private static final ValueSanitizer sAllButNulLegal =
443 new IllegalCharacterValueSanitizer(
444 IllegalCharacterValueSanitizer.ALL_BUT_NUL_LEGAL);
445
446 private static final ValueSanitizer sAllButWhitespaceLegal =
447 new IllegalCharacterValueSanitizer(
448 IllegalCharacterValueSanitizer.ALL_BUT_WHITESPACE_LEGAL);
449
450 private static final ValueSanitizer sURLLegal =
451 new IllegalCharacterValueSanitizer(
452 IllegalCharacterValueSanitizer.URL_LEGAL);
453
454 private static final ValueSanitizer sUrlAndSpaceLegal =
455 new IllegalCharacterValueSanitizer(
456 IllegalCharacterValueSanitizer.URL_AND_SPACE_LEGAL);
457
458 private static final ValueSanitizer sAmpLegal =
459 new IllegalCharacterValueSanitizer(
460 IllegalCharacterValueSanitizer.AMP_LEGAL);
461
462 private static final ValueSanitizer sAmpAndSpaceLegal =
463 new IllegalCharacterValueSanitizer(
464 IllegalCharacterValueSanitizer.AMP_AND_SPACE_LEGAL);
465
466 private static final ValueSanitizer sSpaceLegal =
467 new IllegalCharacterValueSanitizer(
468 IllegalCharacterValueSanitizer.SPACE_LEGAL);
469
470 private static final ValueSanitizer sAllButNulAndAngleBracketsLegal =
471 new IllegalCharacterValueSanitizer(
472 IllegalCharacterValueSanitizer.ALL_BUT_NUL_AND_ANGLE_BRACKETS_LEGAL);
473
474 /**
475 * Return a value sanitizer that does not allow any special characters,
476 * and also does not allow script URLs.
477 * @return a value sanitizer
478 */
479 public static final ValueSanitizer getAllIllegal() {
480 return sAllIllegal;
481 }
482
483 /**
484 * Return a value sanitizer that allows everything except Nul ('\0')
485 * characters. Script URLs are allowed.
486 * @return a value sanitizer
487 */
488 public static final ValueSanitizer getAllButNulLegal() {
489 return sAllButNulLegal;
490 }
491 /**
492 * Return a value sanitizer that allows everything except Nul ('\0')
493 * characters, space (' '), and other whitespace characters.
494 * Script URLs are allowed.
495 * @return a value sanitizer
496 */
497 public static final ValueSanitizer getAllButWhitespaceLegal() {
498 return sAllButWhitespaceLegal;
499 }
500 /**
501 * Return a value sanitizer that allows all the characters used by
502 * encoded URLs. Does not allow script URLs.
503 * @return a value sanitizer
504 */
505 public static final ValueSanitizer getUrlLegal() {
506 return sURLLegal;
507 }
508 /**
509 * Return a value sanitizer that allows all the characters used by
510 * encoded URLs and allows spaces, which are not technically legal
511 * in encoded URLs, but commonly appear anyway.
512 * Does not allow script URLs.
513 * @return a value sanitizer
514 */
515 public static final ValueSanitizer getUrlAndSpaceLegal() {
516 return sUrlAndSpaceLegal;
517 }
518 /**
519 * Return a value sanitizer that does not allow any special characters
520 * except ampersand ('&'). Does not allow script URLs.
521 * @return a value sanitizer
522 */
523 public static final ValueSanitizer getAmpLegal() {
524 return sAmpLegal;
525 }
526 /**
527 * Return a value sanitizer that does not allow any special characters
528 * except ampersand ('&') and space (' '). Does not allow script URLs.
529 * @return a value sanitizer
530 */
531 public static final ValueSanitizer getAmpAndSpaceLegal() {
532 return sAmpAndSpaceLegal;
533 }
534 /**
535 * Return a value sanitizer that does not allow any special characters
536 * except space (' '). Does not allow script URLs.
537 * @return a value sanitizer
538 */
539 public static final ValueSanitizer getSpaceLegal() {
540 return sSpaceLegal;
541 }
542 /**
543 * Return a value sanitizer that allows any special characters
544 * except angle brackets ('<' and '>') and Nul ('\0').
545 * Allows script URLs.
546 * @return a value sanitizer
547 */
548 public static final ValueSanitizer getAllButNulAndAngleBracketsLegal() {
549 return sAllButNulAndAngleBracketsLegal;
550 }
551
552 /**
553 * Constructs a UrlQuerySanitizer.
554 * <p>
555 * Defaults:
556 * <ul>
557 * <li>unregistered parameters are not allowed.
558 * <li>the last instance of a repeated parameter is preferred.
559 * <li>The default value sanitizer is an AllIllegal value sanitizer.
560 * <ul>
561 */
562 public UrlQuerySanitizer() {
563 }
564
565 /**
566 * Constructs a UrlQuerySanitizer and parse a URL.
567 * This constructor is provided for convenience when the
568 * default parsing behavior is acceptable.
569 * <p>
570 * Because the URL is parsed before the constructor returns, there isn't
571 * a chance to configure the sanitizer to change the parsing behavior.
572 * <p>
573 * <code>
574 * UrlQuerySanitizer sanitizer = new UrlQuerySanitizer(myUrl);
575 * String name = sanitizer.getValue("name");
576 * </code>
577 * <p>
578 * Defaults:
579 * <ul>
580 * <li>unregistered parameters <em>are</em> allowed.
581 * <li>the last instance of a repeated parameter is preferred.
582 * <li>The default value sanitizer is an AllIllegal value sanitizer.
583 * <ul>
584 */
585 public UrlQuerySanitizer(String url) {
586 setAllowUnregisteredParamaters(true);
587 parseUrl(url);
588 }
589
590 /**
591 * Parse the query parameters out of an encoded URL.
592 * Works by extracting the query portion from the URL and then
593 * calling parseQuery(). If there is no query portion it is
594 * treated as if the query portion is an empty string.
595 * @param url the encoded URL to parse.
596 */
597 public void parseUrl(String url) {
598 int queryIndex = url.indexOf('?');
599 String query;
600 if (queryIndex >= 0) {
601 query = url.substring(queryIndex + 1);
602 }
603 else {
604 query = "";
605 }
606 parseQuery(query);
607 }
608
609 /**
610 * Parse a query. A query string is any number of parameter-value clauses
611 * separated by any non-zero number of ampersands. A parameter-value clause
612 * is a parameter followed by an equal sign, followed by a value. If the
613 * equal sign is missing, the value is assumed to be the empty string.
614 * @param query the query to parse.
615 */
616 public void parseQuery(String query) {
617 clear();
618 // Split by '&'
619 StringTokenizer tokenizer = new StringTokenizer(query, "&");
620 while(tokenizer.hasMoreElements()) {
621 String attributeValuePair = tokenizer.nextToken();
622 if (attributeValuePair.length() > 0) {
623 int assignmentIndex = attributeValuePair.indexOf('=');
624 if (assignmentIndex < 0) {
625 // No assignment found, treat as if empty value
626 parseEntry(attributeValuePair, "");
627 }
628 else {
629 parseEntry(attributeValuePair.substring(0, assignmentIndex),
630 attributeValuePair.substring(assignmentIndex + 1));
631 }
632 }
633 }
634 }
635
636 /**
637 * Get a set of all of the parameters found in the sanitized query.
638 * <p>
639 * Note: Do not modify this set. Treat it as a read-only set.
640 * @return all the parameters found in the current query.
641 */
642 public Set<String> getParameterSet() {
643 return mEntries.keySet();
644 }
645
646 /**
647 * An array list of all of the parameter value pairs in the sanitized
648 * query, in the order they appeared in the query. May contain duplicate
649 * parameters.
650 * <p class="note"><b>Note:</b> Do not modify this list. Treat it as a read-only list.</p>
651 */
652 public List<ParameterValuePair> getParameterList() {
653 return mEntriesList;
654 }
655
656 /**
657 * Check if a parameter exists in the current sanitized query.
658 * @param parameter the unencoded name of a parameter.
659 * @return true if the paramater exists in the current sanitized queary.
660 */
661 public boolean hasParameter(String parameter) {
662 return mEntries.containsKey(parameter);
663 }
664
665 /**
666 * Get the value for a parameter in the current sanitized query.
667 * Returns null if the parameter does not
668 * exit.
669 * @param parameter the unencoded name of a parameter.
670 * @return the sanitized unencoded value of the parameter,
671 * or null if the parameter does not exist.
672 */
673 public String getValue(String parameter) {
674 return mEntries.get(parameter);
675 }
676
677 /**
678 * Register a value sanitizer for a particular parameter. Can also be used
679 * to replace or remove an already-set value sanitizer.
680 * <p>
681 * Registering a non-null value sanitizer for a particular parameter
682 * makes that parameter a registered parameter.
683 * @param parameter an unencoded parameter name
684 * @param valueSanitizer the value sanitizer to use for a particular
685 * parameter. May be null in order to unregister that parameter.
686 * @see #getAllowUnregisteredParamaters()
687 */
688 public void registerParameter(String parameter,
689 ValueSanitizer valueSanitizer) {
690 if (valueSanitizer == null) {
691 mSanitizers.remove(parameter);
692 }
693 mSanitizers.put(parameter, valueSanitizer);
694 }
695
696 /**
697 * Register a value sanitizer for an array of parameters.
698 * @param parameters An array of unencoded parameter names.
699 * @param valueSanitizer
700 * @see #registerParameter
701 */
702 public void registerParameters(String[] parameters,
703 ValueSanitizer valueSanitizer) {
704 int length = parameters.length;
705 for(int i = 0; i < length; i++) {
706 mSanitizers.put(parameters[i], valueSanitizer);
707 }
708 }
709
710 /**
711 * Set whether or not unregistered parameters are allowed. If they
712 * are not allowed, then they will be dropped when a query is sanitized.
713 * <p>
714 * Defaults to false.
715 * @param allowUnregisteredParamaters true to allow unregistered parameters.
716 * @see #getAllowUnregisteredParamaters()
717 */
718 public void setAllowUnregisteredParamaters(
719 boolean allowUnregisteredParamaters) {
720 mAllowUnregisteredParamaters = allowUnregisteredParamaters;
721 }
722
723 /**
724 * Get whether or not unregistered parameters are allowed. If not
725 * allowed, they will be dropped when a query is parsed.
726 * @return true if unregistered parameters are allowed.
727 * @see #setAllowUnregisteredParamaters(boolean)
728 */
729 public boolean getAllowUnregisteredParamaters() {
730 return mAllowUnregisteredParamaters;
731 }
732
733 /**
734 * Set whether or not the first occurrence of a repeated parameter is
735 * preferred. True means the first repeated parameter is preferred.
736 * False means that the last repeated parameter is preferred.
737 * <p>
738 * The preferred parameter is the one that is returned when getParameter
739 * is called.
740 * <p>
741 * defaults to false.
742 * @param preferFirstRepeatedParameter True if the first repeated
743 * parameter is preferred.
744 * @see #getPreferFirstRepeatedParameter()
745 */
746 public void setPreferFirstRepeatedParameter(
747 boolean preferFirstRepeatedParameter) {
748 mPreferFirstRepeatedParameter = preferFirstRepeatedParameter;
749 }
750
751 /**
752 * Get whether or not the first occurrence of a repeated parameter is
753 * preferred.
754 * @return true if the first occurrence of a repeated parameter is
755 * preferred.
756 * @see #setPreferFirstRepeatedParameter(boolean)
757 */
758 public boolean getPreferFirstRepeatedParameter() {
759 return mPreferFirstRepeatedParameter;
760 }
761
762 /**
763 * Parse an escaped parameter-value pair. The default implementation
764 * unescapes both the parameter and the value, then looks up the
765 * effective value sanitizer for the parameter and uses it to sanitize
766 * the value. If all goes well then addSanitizedValue is called with
767 * the unescaped parameter and the sanitized unescaped value.
768 * @param parameter an escaped parameter
769 * @param value an unsanitzied escaped value
770 */
771 protected void parseEntry(String parameter, String value) {
772 String unescapedParameter = unescape(parameter);
773 ValueSanitizer valueSanitizer =
774 getEffectiveValueSanitizer(unescapedParameter);
775
776 if (valueSanitizer == null) {
777 return;
778 }
779 String unescapedValue = unescape(value);
780 String sanitizedValue = valueSanitizer.sanitize(unescapedValue);
781 addSanitizedEntry(unescapedParameter, sanitizedValue);
782 }
783
784 /**
785 * Record a sanitized parameter-value pair. Override if you want to
786 * do additional filtering or validation.
787 * @param parameter an unescaped parameter
788 * @param value a sanitized unescaped value
789 */
790 protected void addSanitizedEntry(String parameter, String value) {
791 mEntriesList.add(
792 new ParameterValuePair(parameter, value));
793 if (mPreferFirstRepeatedParameter) {
794 if (mEntries.containsKey(parameter)) {
795 return;
796 }
797 }
798 mEntries.put(parameter, value);
799 }
800
801 /**
802 * Get the value sanitizer for a parameter. Returns null if there
803 * is no value sanitizer registered for the parameter.
804 * @param parameter the unescaped parameter
805 * @return the currently registered value sanitizer for this parameter.
806 * @see #registerParameter(String, android.net.UrlQuerySanitizer.ValueSanitizer)
807 */
808 public ValueSanitizer getValueSanitizer(String parameter) {
809 return mSanitizers.get(parameter);
810 }
811
812 /**
813 * Get the effective value sanitizer for a parameter. Like getValueSanitizer,
814 * except if there is no value sanitizer registered for a parameter, and
815 * unregistered paramaters are allowed, then the default value sanitizer is
816 * returned.
817 * @param parameter an unescaped parameter
818 * @return the effective value sanitizer for a parameter.
819 */
820 public ValueSanitizer getEffectiveValueSanitizer(String parameter) {
821 ValueSanitizer sanitizer = getValueSanitizer(parameter);
822 if (sanitizer == null && mAllowUnregisteredParamaters) {
823 sanitizer = getUnregisteredParameterValueSanitizer();
824 }
825 return sanitizer;
826 }
827
828 /**
829 * Unescape an escaped string.
830 * <ul>
831 * <li>'+' characters are replaced by
832 * ' ' characters.
833 * <li>Valid "%xx" escape sequences are replaced by the
834 * corresponding unescaped character.
835 * <li>Invalid escape sequences such as %1z", are passed through unchanged.
836 * <ol>
837 * @param string the escaped string
838 * @return the unescaped string.
839 */
840 public String unescape(String string) {
841 // Early exit if no escaped characters.
842 int firstEscape = string.indexOf('%');
843 if ( firstEscape < 0) {
844 firstEscape = string.indexOf('+');
845 if (firstEscape < 0) {
846 return string;
847 }
848 }
849
850 int length = string.length();
851
852 StringBuilder stringBuilder = new StringBuilder(length);
853 stringBuilder.append(string.substring(0, firstEscape));
854 for (int i = firstEscape; i < length; i++) {
855 char c = string.charAt(i);
856 if (c == '+') {
857 c = ' ';
858 }
859 else if ( c == '%' && i + 2 < length) {
860 char c1 = string.charAt(i + 1);
861 char c2 = string.charAt(i + 2);
862 if (isHexDigit(c1) && isHexDigit(c2)) {
863 c = (char) (decodeHexDigit(c1) * 16 + decodeHexDigit(c2));
864 i += 2;
865 }
866 }
867 stringBuilder.append(c);
868 }
869 return stringBuilder.toString();
870 }
871
872 /**
873 * Test if a character is a hexidecimal digit. Both upper case and lower
874 * case hex digits are allowed.
875 * @param c the character to test
876 * @return true if c is a hex digit.
877 */
878 protected boolean isHexDigit(char c) {
879 return decodeHexDigit(c) >= 0;
880 }
881
882 /**
883 * Convert a character that represents a hexidecimal digit into an integer.
884 * If the character is not a hexidecimal digit, then -1 is returned.
885 * Both upper case and lower case hex digits are allowed.
886 * @param c the hexidecimal digit.
887 * @return the integer value of the hexidecimal digit.
888 */
889
890 protected int decodeHexDigit(char c) {
891 if (c >= '0' && c <= '9') {
892 return c - '0';
893 }
894 else if (c >= 'A' && c <= 'F') {
895 return c - 'A' + 10;
896 }
897 else if (c >= 'a' && c <= 'f') {
898 return c - 'a' + 10;
899 }
900 else {
901 return -1;
902 }
903 }
904
905 /**
906 * Clear the existing entries. Called to get ready to parse a new
907 * query string.
908 */
909 protected void clear() {
910 mEntries.clear();
911 mEntriesList.clear();
912 }
913}
914