blob: b1cf044e8173bd91e0e6a6680192662072571dcc [file] [log] [blame]
The Android Open Source Project9066cfe2009-03-03 19:31:44 -08001/*
2 * Copyright (C) 2007 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17package android.net;
18
19import java.util.ArrayList;
20import java.util.HashMap;
21import java.util.List;
Elliott Hughescb64d432013-08-02 10:00:44 -070022import java.util.Locale;
The Android Open Source Project9066cfe2009-03-03 19:31:44 -080023import java.util.Set;
24import java.util.StringTokenizer;
Chalard Jean9e50b982019-04-19 14:58:51 +090025import java.util.regex.Matcher;
26import java.util.regex.Pattern;
The Android Open Source Project9066cfe2009-03-03 19:31:44 -080027
28/**
29 *
30 * Sanitizes the Query portion of a URL. Simple example:
31 * <code>
32 * UrlQuerySanitizer sanitizer = new UrlQuerySanitizer();
33 * sanitizer.setAllowUnregisteredParamaters(true);
34 * sanitizer.parseUrl("http://example.com/?name=Joe+User");
35 * String name = sanitizer.getValue("name"));
36 * // name now contains "Joe_User"
37 * </code>
38 *
39 * Register ValueSanitizers to customize the way individual
40 * parameters are sanitized:
41 * <code>
42 * UrlQuerySanitizer sanitizer = new UrlQuerySanitizer();
43 * sanitizer.registerParamater("name", UrlQuerySanitizer.createSpaceLegal());
44 * sanitizer.parseUrl("http://example.com/?name=Joe+User");
45 * String name = sanitizer.getValue("name"));
46 * // name now contains "Joe User". (The string is first decoded, which
47 * // converts the '+' to a ' '. Then the string is sanitized, which
48 * // converts the ' ' to an '_'. (The ' ' is converted because the default
49 * unregistered parameter sanitizer does not allow any special characters,
50 * and ' ' is a special character.)
51 * </code>
52 *
53 * There are several ways to create ValueSanitizers. In order of increasing
54 * sophistication:
55 * <ol>
56 * <li>Call one of the UrlQuerySanitizer.createXXX() methods.
57 * <li>Construct your own instance of
58 * UrlQuerySanitizer.IllegalCharacterValueSanitizer.
59 * <li>Subclass UrlQuerySanitizer.ValueSanitizer to define your own value
60 * sanitizer.
61 * </ol>
62 *
63 */
64public class UrlQuerySanitizer {
65
66 /**
67 * A simple tuple that holds parameter-value pairs.
68 *
69 */
70 public class ParameterValuePair {
71 /**
72 * Construct a parameter-value tuple.
73 * @param parameter an unencoded parameter
74 * @param value an unencoded value
75 */
76 public ParameterValuePair(String parameter,
77 String value) {
78 mParameter = parameter;
79 mValue = value;
80 }
81 /**
82 * The unencoded parameter
83 */
84 public String mParameter;
85 /**
86 * The unencoded value
87 */
88 public String mValue;
89 }
90
91 final private HashMap<String, ValueSanitizer> mSanitizers =
92 new HashMap<String, ValueSanitizer>();
93 final private HashMap<String, String> mEntries =
94 new HashMap<String, String>();
95 final private ArrayList<ParameterValuePair> mEntriesList =
96 new ArrayList<ParameterValuePair>();
97 private boolean mAllowUnregisteredParamaters;
98 private boolean mPreferFirstRepeatedParameter;
99 private ValueSanitizer mUnregisteredParameterValueSanitizer =
100 getAllIllegal();
101
102 /**
103 * A functor used to sanitize a single query value.
104 *
105 */
106 public static interface ValueSanitizer {
107 /**
108 * Sanitize an unencoded value.
109 * @param value
110 * @return the sanitized unencoded value
111 */
112 public String sanitize(String value);
113 }
114
115 /**
116 * Sanitize values based on which characters they contain. Illegal
117 * characters are replaced with either space or '_', depending upon
118 * whether space is a legal character or not.
119 */
120 public static class IllegalCharacterValueSanitizer implements
121 ValueSanitizer {
122 private int mFlags;
123
124 /**
125 * Allow space (' ') characters.
126 */
127 public final static int SPACE_OK = 1 << 0;
128 /**
129 * Allow whitespace characters other than space. The
130 * other whitespace characters are
131 * '\t' '\f' '\n' '\r' and '\0x000b' (vertical tab)
132 */
133 public final static int OTHER_WHITESPACE_OK = 1 << 1;
134 /**
135 * Allow characters with character codes 128 to 255.
136 */
137 public final static int NON_7_BIT_ASCII_OK = 1 << 2;
138 /**
139 * Allow double quote characters. ('"')
140 */
141 public final static int DQUOTE_OK = 1 << 3;
142 /**
143 * Allow single quote characters. ('\'')
144 */
145 public final static int SQUOTE_OK = 1 << 4;
146 /**
147 * Allow less-than characters. ('<')
148 */
149 public final static int LT_OK = 1 << 5;
150 /**
151 * Allow greater-than characters. ('>')
152 */
153 public final static int GT_OK = 1 << 6;
154 /**
155 * Allow ampersand characters ('&')
156 */
157 public final static int AMP_OK = 1 << 7;
158 /**
159 * Allow percent-sign characters ('%')
160 */
161 public final static int PCT_OK = 1 << 8;
162 /**
163 * Allow nul characters ('\0')
164 */
165 public final static int NUL_OK = 1 << 9;
166 /**
167 * Allow text to start with a script URL
168 * such as "javascript:" or "vbscript:"
169 */
170 public final static int SCRIPT_URL_OK = 1 << 10;
171
172 /**
173 * Mask with all fields set to OK
174 */
175 public final static int ALL_OK = 0x7ff;
176
177 /**
178 * Mask with both regular space and other whitespace OK
179 */
180 public final static int ALL_WHITESPACE_OK =
181 SPACE_OK | OTHER_WHITESPACE_OK;
182
183
184 // Common flag combinations:
185
186 /**
187 * <ul>
188 * <li>Deny all special characters.
189 * <li>Deny script URLs.
190 * </ul>
191 */
192 public final static int ALL_ILLEGAL =
193 0;
194 /**
195 * <ul>
196 * <li>Allow all special characters except Nul. ('\0').
197 * <li>Allow script URLs.
198 * </ul>
199 */
200 public final static int ALL_BUT_NUL_LEGAL =
201 ALL_OK & ~NUL_OK;
202 /**
203 * <ul>
204 * <li>Allow all special characters except for:
205 * <ul>
206 * <li>whitespace characters
207 * <li>Nul ('\0')
208 * </ul>
209 * <li>Allow script URLs.
210 * </ul>
211 */
212 public final static int ALL_BUT_WHITESPACE_LEGAL =
213 ALL_OK & ~(ALL_WHITESPACE_OK | NUL_OK);
214 /**
215 * <ul>
216 * <li>Allow characters used by encoded URLs.
217 * <li>Deny script URLs.
218 * </ul>
219 */
220 public final static int URL_LEGAL =
221 NON_7_BIT_ASCII_OK | SQUOTE_OK | AMP_OK | PCT_OK;
222 /**
223 * <ul>
224 * <li>Allow characters used by encoded URLs.
225 * <li>Allow spaces.
226 * <li>Deny script URLs.
227 * </ul>
228 */
229 public final static int URL_AND_SPACE_LEGAL =
230 URL_LEGAL | SPACE_OK;
231 /**
232 * <ul>
233 * <li>Allow ampersand.
234 * <li>Deny script URLs.
235 * </ul>
236 */
237 public final static int AMP_LEGAL =
238 AMP_OK;
239 /**
240 * <ul>
241 * <li>Allow ampersand.
242 * <li>Allow space.
243 * <li>Deny script URLs.
244 * </ul>
245 */
246 public final static int AMP_AND_SPACE_LEGAL =
247 AMP_OK | SPACE_OK;
248 /**
249 * <ul>
250 * <li>Allow space.
251 * <li>Deny script URLs.
252 * </ul>
253 */
254 public final static int SPACE_LEGAL =
255 SPACE_OK;
256 /**
257 * <ul>
258 * <li>Allow all but.
259 * <ul>
260 * <li>Nul ('\0')
261 * <li>Angle brackets ('<', '>')
262 * </ul>
263 * <li>Deny script URLs.
264 * </ul>
265 */
266 public final static int ALL_BUT_NUL_AND_ANGLE_BRACKETS_LEGAL =
267 ALL_OK & ~(NUL_OK | LT_OK | GT_OK);
268
269 /**
270 * Script URL definitions
271 */
272
273 private final static String JAVASCRIPT_PREFIX = "javascript:";
274
275 private final static String VBSCRIPT_PREFIX = "vbscript:";
276
277 private final static int MIN_SCRIPT_PREFIX_LENGTH = Math.min(
278 JAVASCRIPT_PREFIX.length(), VBSCRIPT_PREFIX.length());
279
280 /**
281 * Construct a sanitizer. The parameters set the behavior of the
282 * sanitizer.
283 * @param flags some combination of the XXX_OK flags.
284 */
285 public IllegalCharacterValueSanitizer(
286 int flags) {
287 mFlags = flags;
288 }
289 /**
290 * Sanitize a value.
291 * <ol>
koprivac5fea192018-10-03 13:29:15 -0700292 * <li>If script URLs are not OK, they will be removed.
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800293 * <li>If neither spaces nor other white space is OK, then
294 * white space will be trimmed from the beginning and end of
295 * the URL. (Just the actual white space characters are trimmed, not
296 * other control codes.)
297 * <li> Illegal characters will be replaced with
298 * either ' ' or '_', depending on whether a space is itself a
299 * legal character.
300 * </ol>
301 * @param value
302 * @return the sanitized value
303 */
304 public String sanitize(String value) {
305 if (value == null) {
306 return null;
307 }
308 int length = value.length();
Jeff Sharkeyc6f5e982020-04-10 11:34:54 -0600309 if ((mFlags & SCRIPT_URL_OK) == 0) {
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800310 if (length >= MIN_SCRIPT_PREFIX_LENGTH) {
Elliott Hughescb64d432013-08-02 10:00:44 -0700311 String asLower = value.toLowerCase(Locale.ROOT);
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800312 if (asLower.startsWith(JAVASCRIPT_PREFIX) ||
313 asLower.startsWith(VBSCRIPT_PREFIX)) {
314 return "";
315 }
316 }
317 }
318
319 // If whitespace isn't OK, get rid of whitespace at beginning
320 // and end of value.
321 if ( (mFlags & ALL_WHITESPACE_OK) == 0) {
322 value = trimWhitespace(value);
323 // The length could have changed, so we need to correct
324 // the length variable.
325 length = value.length();
326 }
327
328 StringBuilder stringBuilder = new StringBuilder(length);
329 for(int i = 0; i < length; i++) {
330 char c = value.charAt(i);
331 if (!characterIsLegal(c)) {
332 if ((mFlags & SPACE_OK) != 0) {
333 c = ' ';
334 }
335 else {
336 c = '_';
337 }
338 }
339 stringBuilder.append(c);
340 }
341 return stringBuilder.toString();
342 }
343
344 /**
345 * Trim whitespace from the beginning and end of a string.
346 * <p>
347 * Note: can't use {@link String#trim} because {@link String#trim} has a
348 * different definition of whitespace than we want.
349 * @param value the string to trim
350 * @return the trimmed string
351 */
352 private String trimWhitespace(String value) {
353 int start = 0;
354 int last = value.length() - 1;
355 int end = last;
356 while (start <= end && isWhitespace(value.charAt(start))) {
357 start++;
358 }
359 while (end >= start && isWhitespace(value.charAt(end))) {
360 end--;
361 }
362 if (start == 0 && end == last) {
363 return value;
364 }
365 return value.substring(start, end + 1);
366 }
367
368 /**
369 * Check if c is whitespace.
370 * @param c character to test
371 * @return true if c is a whitespace character
372 */
373 private boolean isWhitespace(char c) {
374 switch(c) {
375 case ' ':
376 case '\t':
377 case '\f':
378 case '\n':
379 case '\r':
380 case 11: /* VT */
381 return true;
382 default:
383 return false;
384 }
385 }
386
387 /**
388 * Check whether an individual character is legal. Uses the
389 * flag bit-set passed into the constructor.
390 * @param c
391 * @return true if c is a legal character
392 */
393 private boolean characterIsLegal(char c) {
394 switch(c) {
395 case ' ' : return (mFlags & SPACE_OK) != 0;
396 case '\t': case '\f': case '\n': case '\r': case 11: /* VT */
397 return (mFlags & OTHER_WHITESPACE_OK) != 0;
398 case '\"': return (mFlags & DQUOTE_OK) != 0;
399 case '\'': return (mFlags & SQUOTE_OK) != 0;
400 case '<' : return (mFlags & LT_OK) != 0;
401 case '>' : return (mFlags & GT_OK) != 0;
402 case '&' : return (mFlags & AMP_OK) != 0;
403 case '%' : return (mFlags & PCT_OK) != 0;
404 case '\0': return (mFlags & NUL_OK) != 0;
405 default : return (c >= 32 && c < 127) ||
406 ((c >= 128) && ((mFlags & NON_7_BIT_ASCII_OK) != 0));
407 }
408 }
409 }
410
411 /**
412 * Get the current value sanitizer used when processing
413 * unregistered parameter values.
414 * <p>
415 * <b>Note:</b> The default unregistered parameter value sanitizer is
416 * one that doesn't allow any special characters, similar to what
417 * is returned by calling createAllIllegal.
418 *
419 * @return the current ValueSanitizer used to sanitize unregistered
420 * parameter values.
421 */
422 public ValueSanitizer getUnregisteredParameterValueSanitizer() {
423 return mUnregisteredParameterValueSanitizer;
424 }
425
426 /**
427 * Set the value sanitizer used when processing unregistered
428 * parameter values.
429 * @param sanitizer set the ValueSanitizer used to sanitize unregistered
430 * parameter values.
431 */
432 public void setUnregisteredParameterValueSanitizer(
433 ValueSanitizer sanitizer) {
434 mUnregisteredParameterValueSanitizer = sanitizer;
435 }
436
437
438 // Private fields for singleton sanitizers:
439
440 private static final ValueSanitizer sAllIllegal =
441 new IllegalCharacterValueSanitizer(
442 IllegalCharacterValueSanitizer.ALL_ILLEGAL);
443
444 private static final ValueSanitizer sAllButNulLegal =
445 new IllegalCharacterValueSanitizer(
446 IllegalCharacterValueSanitizer.ALL_BUT_NUL_LEGAL);
447
448 private static final ValueSanitizer sAllButWhitespaceLegal =
449 new IllegalCharacterValueSanitizer(
450 IllegalCharacterValueSanitizer.ALL_BUT_WHITESPACE_LEGAL);
451
452 private static final ValueSanitizer sURLLegal =
453 new IllegalCharacterValueSanitizer(
454 IllegalCharacterValueSanitizer.URL_LEGAL);
455
456 private static final ValueSanitizer sUrlAndSpaceLegal =
457 new IllegalCharacterValueSanitizer(
458 IllegalCharacterValueSanitizer.URL_AND_SPACE_LEGAL);
459
460 private static final ValueSanitizer sAmpLegal =
461 new IllegalCharacterValueSanitizer(
462 IllegalCharacterValueSanitizer.AMP_LEGAL);
463
464 private static final ValueSanitizer sAmpAndSpaceLegal =
465 new IllegalCharacterValueSanitizer(
466 IllegalCharacterValueSanitizer.AMP_AND_SPACE_LEGAL);
467
468 private static final ValueSanitizer sSpaceLegal =
469 new IllegalCharacterValueSanitizer(
470 IllegalCharacterValueSanitizer.SPACE_LEGAL);
471
472 private static final ValueSanitizer sAllButNulAndAngleBracketsLegal =
473 new IllegalCharacterValueSanitizer(
474 IllegalCharacterValueSanitizer.ALL_BUT_NUL_AND_ANGLE_BRACKETS_LEGAL);
475
476 /**
477 * Return a value sanitizer that does not allow any special characters,
478 * and also does not allow script URLs.
479 * @return a value sanitizer
480 */
481 public static final ValueSanitizer getAllIllegal() {
482 return sAllIllegal;
483 }
484
485 /**
486 * Return a value sanitizer that allows everything except Nul ('\0')
487 * characters. Script URLs are allowed.
488 * @return a value sanitizer
489 */
490 public static final ValueSanitizer getAllButNulLegal() {
491 return sAllButNulLegal;
492 }
493 /**
494 * Return a value sanitizer that allows everything except Nul ('\0')
495 * characters, space (' '), and other whitespace characters.
496 * Script URLs are allowed.
497 * @return a value sanitizer
498 */
499 public static final ValueSanitizer getAllButWhitespaceLegal() {
500 return sAllButWhitespaceLegal;
501 }
502 /**
503 * Return a value sanitizer that allows all the characters used by
504 * encoded URLs. Does not allow script URLs.
505 * @return a value sanitizer
506 */
507 public static final ValueSanitizer getUrlLegal() {
508 return sURLLegal;
509 }
510 /**
511 * Return a value sanitizer that allows all the characters used by
512 * encoded URLs and allows spaces, which are not technically legal
513 * in encoded URLs, but commonly appear anyway.
514 * Does not allow script URLs.
515 * @return a value sanitizer
516 */
517 public static final ValueSanitizer getUrlAndSpaceLegal() {
518 return sUrlAndSpaceLegal;
519 }
520 /**
521 * Return a value sanitizer that does not allow any special characters
522 * except ampersand ('&'). Does not allow script URLs.
523 * @return a value sanitizer
524 */
525 public static final ValueSanitizer getAmpLegal() {
526 return sAmpLegal;
527 }
528 /**
529 * Return a value sanitizer that does not allow any special characters
530 * except ampersand ('&') and space (' '). Does not allow script URLs.
531 * @return a value sanitizer
532 */
533 public static final ValueSanitizer getAmpAndSpaceLegal() {
534 return sAmpAndSpaceLegal;
535 }
536 /**
537 * Return a value sanitizer that does not allow any special characters
538 * except space (' '). Does not allow script URLs.
539 * @return a value sanitizer
540 */
541 public static final ValueSanitizer getSpaceLegal() {
542 return sSpaceLegal;
543 }
544 /**
545 * Return a value sanitizer that allows any special characters
546 * except angle brackets ('<' and '>') and Nul ('\0').
547 * Allows script URLs.
548 * @return a value sanitizer
549 */
550 public static final ValueSanitizer getAllButNulAndAngleBracketsLegal() {
551 return sAllButNulAndAngleBracketsLegal;
552 }
553
554 /**
555 * Constructs a UrlQuerySanitizer.
556 * <p>
557 * Defaults:
558 * <ul>
559 * <li>unregistered parameters are not allowed.
560 * <li>the last instance of a repeated parameter is preferred.
561 * <li>The default value sanitizer is an AllIllegal value sanitizer.
562 * <ul>
563 */
564 public UrlQuerySanitizer() {
565 }
566
567 /**
koprivac5fea192018-10-03 13:29:15 -0700568 * Constructs a UrlQuerySanitizer and parses a URL.
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800569 * This constructor is provided for convenience when the
570 * default parsing behavior is acceptable.
571 * <p>
572 * Because the URL is parsed before the constructor returns, there isn't
573 * a chance to configure the sanitizer to change the parsing behavior.
574 * <p>
575 * <code>
576 * UrlQuerySanitizer sanitizer = new UrlQuerySanitizer(myUrl);
577 * String name = sanitizer.getValue("name");
578 * </code>
579 * <p>
580 * Defaults:
581 * <ul>
582 * <li>unregistered parameters <em>are</em> allowed.
583 * <li>the last instance of a repeated parameter is preferred.
584 * <li>The default value sanitizer is an AllIllegal value sanitizer.
585 * <ul>
586 */
587 public UrlQuerySanitizer(String url) {
588 setAllowUnregisteredParamaters(true);
589 parseUrl(url);
590 }
591
592 /**
593 * Parse the query parameters out of an encoded URL.
594 * Works by extracting the query portion from the URL and then
595 * calling parseQuery(). If there is no query portion it is
596 * treated as if the query portion is an empty string.
597 * @param url the encoded URL to parse.
598 */
599 public void parseUrl(String url) {
600 int queryIndex = url.indexOf('?');
601 String query;
602 if (queryIndex >= 0) {
603 query = url.substring(queryIndex + 1);
604 }
605 else {
606 query = "";
607 }
608 parseQuery(query);
609 }
610
611 /**
612 * Parse a query. A query string is any number of parameter-value clauses
613 * separated by any non-zero number of ampersands. A parameter-value clause
614 * is a parameter followed by an equal sign, followed by a value. If the
615 * equal sign is missing, the value is assumed to be the empty string.
616 * @param query the query to parse.
617 */
618 public void parseQuery(String query) {
619 clear();
620 // Split by '&'
621 StringTokenizer tokenizer = new StringTokenizer(query, "&");
622 while(tokenizer.hasMoreElements()) {
623 String attributeValuePair = tokenizer.nextToken();
624 if (attributeValuePair.length() > 0) {
625 int assignmentIndex = attributeValuePair.indexOf('=');
626 if (assignmentIndex < 0) {
627 // No assignment found, treat as if empty value
628 parseEntry(attributeValuePair, "");
629 }
630 else {
631 parseEntry(attributeValuePair.substring(0, assignmentIndex),
632 attributeValuePair.substring(assignmentIndex + 1));
633 }
634 }
635 }
636 }
637
638 /**
639 * Get a set of all of the parameters found in the sanitized query.
640 * <p>
641 * Note: Do not modify this set. Treat it as a read-only set.
642 * @return all the parameters found in the current query.
643 */
644 public Set<String> getParameterSet() {
645 return mEntries.keySet();
646 }
647
648 /**
koprivac5fea192018-10-03 13:29:15 -0700649 * An array list of all of the parameter-value pairs in the sanitized
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800650 * query, in the order they appeared in the query. May contain duplicate
651 * parameters.
652 * <p class="note"><b>Note:</b> Do not modify this list. Treat it as a read-only list.</p>
653 */
654 public List<ParameterValuePair> getParameterList() {
655 return mEntriesList;
656 }
657
658 /**
659 * Check if a parameter exists in the current sanitized query.
660 * @param parameter the unencoded name of a parameter.
koprivac5fea192018-10-03 13:29:15 -0700661 * @return true if the parameter exists in the current sanitized queary.
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800662 */
663 public boolean hasParameter(String parameter) {
664 return mEntries.containsKey(parameter);
665 }
666
667 /**
668 * Get the value for a parameter in the current sanitized query.
669 * Returns null if the parameter does not
670 * exit.
671 * @param parameter the unencoded name of a parameter.
672 * @return the sanitized unencoded value of the parameter,
673 * or null if the parameter does not exist.
674 */
675 public String getValue(String parameter) {
676 return mEntries.get(parameter);
677 }
678
679 /**
680 * Register a value sanitizer for a particular parameter. Can also be used
681 * to replace or remove an already-set value sanitizer.
682 * <p>
683 * Registering a non-null value sanitizer for a particular parameter
684 * makes that parameter a registered parameter.
685 * @param parameter an unencoded parameter name
686 * @param valueSanitizer the value sanitizer to use for a particular
687 * parameter. May be null in order to unregister that parameter.
688 * @see #getAllowUnregisteredParamaters()
689 */
690 public void registerParameter(String parameter,
691 ValueSanitizer valueSanitizer) {
692 if (valueSanitizer == null) {
693 mSanitizers.remove(parameter);
694 }
695 mSanitizers.put(parameter, valueSanitizer);
696 }
697
698 /**
699 * Register a value sanitizer for an array of parameters.
700 * @param parameters An array of unencoded parameter names.
701 * @param valueSanitizer
702 * @see #registerParameter
703 */
704 public void registerParameters(String[] parameters,
705 ValueSanitizer valueSanitizer) {
706 int length = parameters.length;
707 for(int i = 0; i < length; i++) {
708 mSanitizers.put(parameters[i], valueSanitizer);
709 }
710 }
711
712 /**
713 * Set whether or not unregistered parameters are allowed. If they
714 * are not allowed, then they will be dropped when a query is sanitized.
715 * <p>
716 * Defaults to false.
717 * @param allowUnregisteredParamaters true to allow unregistered parameters.
718 * @see #getAllowUnregisteredParamaters()
719 */
720 public void setAllowUnregisteredParamaters(
721 boolean allowUnregisteredParamaters) {
722 mAllowUnregisteredParamaters = allowUnregisteredParamaters;
723 }
724
725 /**
726 * Get whether or not unregistered parameters are allowed. If not
727 * allowed, they will be dropped when a query is parsed.
728 * @return true if unregistered parameters are allowed.
729 * @see #setAllowUnregisteredParamaters(boolean)
730 */
731 public boolean getAllowUnregisteredParamaters() {
732 return mAllowUnregisteredParamaters;
733 }
734
735 /**
736 * Set whether or not the first occurrence of a repeated parameter is
737 * preferred. True means the first repeated parameter is preferred.
738 * False means that the last repeated parameter is preferred.
739 * <p>
740 * The preferred parameter is the one that is returned when getParameter
741 * is called.
742 * <p>
743 * defaults to false.
744 * @param preferFirstRepeatedParameter True if the first repeated
745 * parameter is preferred.
746 * @see #getPreferFirstRepeatedParameter()
747 */
748 public void setPreferFirstRepeatedParameter(
749 boolean preferFirstRepeatedParameter) {
750 mPreferFirstRepeatedParameter = preferFirstRepeatedParameter;
751 }
752
753 /**
754 * Get whether or not the first occurrence of a repeated parameter is
755 * preferred.
756 * @return true if the first occurrence of a repeated parameter is
757 * preferred.
758 * @see #setPreferFirstRepeatedParameter(boolean)
759 */
760 public boolean getPreferFirstRepeatedParameter() {
761 return mPreferFirstRepeatedParameter;
762 }
763
764 /**
765 * Parse an escaped parameter-value pair. The default implementation
766 * unescapes both the parameter and the value, then looks up the
767 * effective value sanitizer for the parameter and uses it to sanitize
768 * the value. If all goes well then addSanitizedValue is called with
769 * the unescaped parameter and the sanitized unescaped value.
770 * @param parameter an escaped parameter
koprivac5fea192018-10-03 13:29:15 -0700771 * @param value an unsanitized escaped value
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800772 */
773 protected void parseEntry(String parameter, String value) {
774 String unescapedParameter = unescape(parameter);
775 ValueSanitizer valueSanitizer =
776 getEffectiveValueSanitizer(unescapedParameter);
777
778 if (valueSanitizer == null) {
779 return;
780 }
781 String unescapedValue = unescape(value);
782 String sanitizedValue = valueSanitizer.sanitize(unescapedValue);
783 addSanitizedEntry(unescapedParameter, sanitizedValue);
784 }
785
786 /**
787 * Record a sanitized parameter-value pair. Override if you want to
788 * do additional filtering or validation.
789 * @param parameter an unescaped parameter
790 * @param value a sanitized unescaped value
791 */
792 protected void addSanitizedEntry(String parameter, String value) {
793 mEntriesList.add(
794 new ParameterValuePair(parameter, value));
795 if (mPreferFirstRepeatedParameter) {
796 if (mEntries.containsKey(parameter)) {
797 return;
798 }
799 }
800 mEntries.put(parameter, value);
801 }
802
803 /**
804 * Get the value sanitizer for a parameter. Returns null if there
805 * is no value sanitizer registered for the parameter.
806 * @param parameter the unescaped parameter
807 * @return the currently registered value sanitizer for this parameter.
808 * @see #registerParameter(String, android.net.UrlQuerySanitizer.ValueSanitizer)
809 */
810 public ValueSanitizer getValueSanitizer(String parameter) {
811 return mSanitizers.get(parameter);
812 }
813
814 /**
815 * Get the effective value sanitizer for a parameter. Like getValueSanitizer,
816 * except if there is no value sanitizer registered for a parameter, and
koprivac5fea192018-10-03 13:29:15 -0700817 * unregistered parameters are allowed, then the default value sanitizer is
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800818 * returned.
819 * @param parameter an unescaped parameter
820 * @return the effective value sanitizer for a parameter.
821 */
822 public ValueSanitizer getEffectiveValueSanitizer(String parameter) {
823 ValueSanitizer sanitizer = getValueSanitizer(parameter);
824 if (sanitizer == null && mAllowUnregisteredParamaters) {
825 sanitizer = getUnregisteredParameterValueSanitizer();
826 }
827 return sanitizer;
828 }
829
830 /**
831 * Unescape an escaped string.
832 * <ul>
833 * <li>'+' characters are replaced by
834 * ' ' characters.
835 * <li>Valid "%xx" escape sequences are replaced by the
836 * corresponding unescaped character.
837 * <li>Invalid escape sequences such as %1z", are passed through unchanged.
838 * <ol>
839 * @param string the escaped string
840 * @return the unescaped string.
841 */
Chalard Jean9e50b982019-04-19 14:58:51 +0900842 private static final Pattern plusOrPercent = Pattern.compile("[+%]");
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800843 public String unescape(String string) {
Chalard Jean9e50b982019-04-19 14:58:51 +0900844 final Matcher matcher = plusOrPercent.matcher(string);
845 if (!matcher.find()) return string;
846 final int firstEscape = matcher.start();
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800847
848 int length = string.length();
849
850 StringBuilder stringBuilder = new StringBuilder(length);
851 stringBuilder.append(string.substring(0, firstEscape));
852 for (int i = firstEscape; i < length; i++) {
853 char c = string.charAt(i);
854 if (c == '+') {
855 c = ' ';
Chalard Jean9e50b982019-04-19 14:58:51 +0900856 } else if (c == '%' && i + 2 < length) {
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800857 char c1 = string.charAt(i + 1);
858 char c2 = string.charAt(i + 2);
859 if (isHexDigit(c1) && isHexDigit(c2)) {
860 c = (char) (decodeHexDigit(c1) * 16 + decodeHexDigit(c2));
861 i += 2;
862 }
863 }
864 stringBuilder.append(c);
865 }
866 return stringBuilder.toString();
867 }
868
869 /**
870 * Test if a character is a hexidecimal digit. Both upper case and lower
871 * case hex digits are allowed.
872 * @param c the character to test
873 * @return true if c is a hex digit.
874 */
875 protected boolean isHexDigit(char c) {
876 return decodeHexDigit(c) >= 0;
877 }
878
879 /**
880 * Convert a character that represents a hexidecimal digit into an integer.
881 * If the character is not a hexidecimal digit, then -1 is returned.
882 * Both upper case and lower case hex digits are allowed.
883 * @param c the hexidecimal digit.
884 * @return the integer value of the hexidecimal digit.
885 */
886
887 protected int decodeHexDigit(char c) {
888 if (c >= '0' && c <= '9') {
889 return c - '0';
890 }
891 else if (c >= 'A' && c <= 'F') {
892 return c - 'A' + 10;
893 }
894 else if (c >= 'a' && c <= 'f') {
895 return c - 'a' + 10;
896 }
897 else {
898 return -1;
899 }
900 }
901
902 /**
903 * Clear the existing entries. Called to get ready to parse a new
904 * query string.
905 */
906 protected void clear() {
907 mEntries.clear();
908 mEntriesList.clear();
909 }
910}
911