Chiao Cheng | ecba27e | 2012-12-27 17:14:53 -0800 | [diff] [blame] | 1 | /* |
| 2 | * Copyright (C) 2012 The Android Open Source Project |
| 3 | * |
| 4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | * you may not use this file except in compliance with the License. |
| 6 | * You may obtain a copy of the License at |
| 7 | * |
| 8 | * http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | * |
| 10 | * Unless required by applicable law or agreed to in writing, software |
| 11 | * distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | * See the License for the specific language governing permissions and |
| 14 | * limitations under the License. |
| 15 | */ |
| 16 | |
| 17 | package com.android.contacts.common.util; |
| 18 | |
| 19 | import com.google.common.annotations.VisibleForTesting; |
| 20 | |
| 21 | /** |
| 22 | * Methods related to search. |
| 23 | */ |
| 24 | public class SearchUtil { |
| 25 | |
| 26 | public static class MatchedLine { |
| 27 | |
| 28 | public int startIndex = -1; |
| 29 | public String line; |
| 30 | |
| 31 | @Override |
| 32 | public String toString() { |
| 33 | return "MatchedLine{" + |
| 34 | "line='" + line + '\'' + |
| 35 | ", startIndex=" + startIndex + |
| 36 | '}'; |
| 37 | } |
| 38 | } |
| 39 | |
| 40 | /** |
| 41 | * Given a string with lines delimited with '\n', finds the matching line to the given |
| 42 | * substring. |
| 43 | * |
| 44 | * @param contents The string to search. |
| 45 | * @param substring The substring to search for. |
| 46 | * @return A MatchedLine object containing the matching line and the startIndex of the substring |
| 47 | * match within that line. |
| 48 | */ |
| 49 | public static MatchedLine findMatchingLine(String contents, String substring) { |
| 50 | final MatchedLine matched = new MatchedLine(); |
| 51 | |
| 52 | // Snippet may contain multiple lines separated by "\n". |
| 53 | // Locate the lines of the content that contain the substring. |
| 54 | final int index = SearchUtil.contains(contents, substring); |
| 55 | if (index != -1) { |
| 56 | // Match found. Find the corresponding line. |
| 57 | int start = index - 1; |
| 58 | while (start > -1) { |
| 59 | if (contents.charAt(start) == '\n') { |
| 60 | break; |
| 61 | } |
| 62 | start--; |
| 63 | } |
| 64 | int end = index + 1; |
| 65 | while (end < contents.length()) { |
| 66 | if (contents.charAt(end) == '\n') { |
| 67 | break; |
| 68 | } |
| 69 | end++; |
| 70 | } |
| 71 | matched.line = contents.substring(start + 1, end); |
| 72 | matched.startIndex = index - (start + 1); |
| 73 | } |
| 74 | return matched; |
| 75 | } |
| 76 | |
| 77 | /** |
| 78 | * Similar to String.contains() with two main differences: |
| 79 | * <p> |
| 80 | * 1) Only searches token prefixes. A token is defined as any combination of letters or |
| 81 | * numbers. |
| 82 | * <p> |
| 83 | * 2) Returns the starting index where the substring is found. |
| 84 | * |
| 85 | * @param value The string to search. |
| 86 | * @param substring The substring to look for. |
| 87 | * @return The starting index where the substring is found. {@literal -1} if substring is not |
| 88 | * found in value. |
| 89 | */ |
| 90 | @VisibleForTesting |
| 91 | static int contains(String value, String substring) { |
| 92 | if (value.length() < substring.length()) { |
| 93 | return -1; |
| 94 | } |
| 95 | |
| 96 | // i18n support |
| 97 | // Generate the code points for the substring once. |
| 98 | // There will be a maximum of substring.length code points. But may be fewer. |
| 99 | // Since the array length is not an accurate size, we need to keep a separate variable. |
| 100 | final int[] substringCodePoints = new int[substring.length()]; |
| 101 | int substringLength = 0; // may not equal substring.length()!! |
| 102 | for (int i = 0; i < substring.length(); ) { |
| 103 | final int codePoint = Character.codePointAt(substring, i); |
| 104 | substringCodePoints[substringLength] = codePoint; |
| 105 | substringLength++; |
| 106 | i += Character.charCount(codePoint); |
| 107 | } |
| 108 | |
Chiao Cheng | ecba27e | 2012-12-27 17:14:53 -0800 | [diff] [blame] | 109 | for (int i = 0; i < value.length(); i = findNextTokenStart(value, i)) { |
Chiao Cheng | ecba27e | 2012-12-27 17:14:53 -0800 | [diff] [blame] | 110 | int numMatch = 0; |
Jay Shrauner | 6ae8b19 | 2013-01-09 11:53:46 -0800 | [diff] [blame] | 111 | for (int j = i; j < value.length() && numMatch < substringLength; ++numMatch) { |
| 112 | int valueCp = Character.toLowerCase(value.codePointAt(j)); |
| 113 | int substringCp = substringCodePoints[numMatch]; |
| 114 | if (valueCp != substringCp) { |
| 115 | break; |
Chiao Cheng | ecba27e | 2012-12-27 17:14:53 -0800 | [diff] [blame] | 116 | } |
Jay Shrauner | 6ae8b19 | 2013-01-09 11:53:46 -0800 | [diff] [blame] | 117 | j += Character.charCount(valueCp); |
Chiao Cheng | ecba27e | 2012-12-27 17:14:53 -0800 | [diff] [blame] | 118 | } |
Jay Shrauner | 6ae8b19 | 2013-01-09 11:53:46 -0800 | [diff] [blame] | 119 | if (numMatch == substringLength) { |
| 120 | return i; |
| 121 | } |
Chiao Cheng | ecba27e | 2012-12-27 17:14:53 -0800 | [diff] [blame] | 122 | } |
| 123 | return -1; |
| 124 | } |
| 125 | |
| 126 | /** |
| 127 | * Find the start of the next token. A token is composed of letters and numbers. Any other |
| 128 | * character are considered delimiters. |
| 129 | * |
| 130 | * @param line The string to search for the next token. |
| 131 | * @param startIndex The index to start searching. 0 based indexing. |
| 132 | * @return The index for the start of the next token. line.length() if next token not found. |
| 133 | */ |
| 134 | @VisibleForTesting |
| 135 | static int findNextTokenStart(String line, int startIndex) { |
| 136 | int index = startIndex; |
| 137 | |
| 138 | // If already in token, eat remainder of token. |
| 139 | while (index <= line.length()) { |
| 140 | if (index == line.length()) { |
| 141 | // No more tokens. |
| 142 | return index; |
| 143 | } |
| 144 | final int codePoint = line.codePointAt(index); |
| 145 | if (!Character.isLetterOrDigit(codePoint)) { |
| 146 | break; |
| 147 | } |
| 148 | index += Character.charCount(codePoint); |
| 149 | } |
| 150 | |
| 151 | // Out of token, eat all consecutive delimiters. |
| 152 | while (index <= line.length()) { |
| 153 | if (index == line.length()) { |
| 154 | return index; |
| 155 | } |
| 156 | final int codePoint = line.codePointAt(index); |
| 157 | if (Character.isLetterOrDigit(codePoint)) { |
| 158 | break; |
| 159 | } |
| 160 | index += Character.charCount(codePoint); |
| 161 | } |
| 162 | |
| 163 | return index; |
| 164 | } |
| 165 | |
| 166 | /** |
| 167 | * Anything other than letter and numbers are considered delimiters. Remove start and end |
| 168 | * delimiters since they are not relevant to search. |
| 169 | * |
| 170 | * @param query The query string to clean. |
| 171 | * @return The cleaned query. Empty string if all characters are cleaned out. |
| 172 | */ |
| 173 | public static String cleanStartAndEndOfSearchQuery(String query) { |
| 174 | int start = 0; |
| 175 | while (start < query.length()) { |
| 176 | int codePoint = query.codePointAt(start); |
| 177 | if (Character.isLetterOrDigit(codePoint)) { |
| 178 | break; |
| 179 | } |
| 180 | start += Character.charCount(codePoint); |
| 181 | } |
| 182 | |
| 183 | if (start == query.length()) { |
| 184 | // All characters are delimiters. |
| 185 | return ""; |
| 186 | } |
| 187 | |
| 188 | int end = query.length() - 1; |
| 189 | while (end > -1) { |
| 190 | if (Character.isLowSurrogate(query.charAt(end))) { |
| 191 | // Assume valid i18n string. There should be a matching high surrogate before it. |
| 192 | end--; |
| 193 | } |
| 194 | int codePoint = query.codePointAt(end); |
| 195 | if (Character.isLetterOrDigit(codePoint)) { |
| 196 | break; |
| 197 | } |
| 198 | end--; |
| 199 | } |
| 200 | |
| 201 | // end is a letter or digit. |
| 202 | return query.substring(start, end + 1); |
| 203 | } |
| 204 | } |