blob: d80fc53c3f4b73d785a4e59e3db763802896e441 [file] [log] [blame]
Chiao Chengecba27e2012-12-27 17:14:53 -08001/*
2 * Copyright (C) 2012 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
Gary Mai69c182a2016-12-05 13:07:03 -080017package com.android.contacts.util;
Chiao Chengecba27e2012-12-27 17:14:53 -080018
19import com.google.common.annotations.VisibleForTesting;
20
21/**
22 * Methods related to search.
23 */
24public class SearchUtil {
25
26 public static class MatchedLine {
27
28 public int startIndex = -1;
29 public String line;
30
31 @Override
32 public String toString() {
33 return "MatchedLine{" +
34 "line='" + line + '\'' +
35 ", startIndex=" + startIndex +
36 '}';
37 }
38 }
39
40 /**
41 * Given a string with lines delimited with '\n', finds the matching line to the given
42 * substring.
43 *
44 * @param contents The string to search.
45 * @param substring The substring to search for.
46 * @return A MatchedLine object containing the matching line and the startIndex of the substring
47 * match within that line.
48 */
49 public static MatchedLine findMatchingLine(String contents, String substring) {
50 final MatchedLine matched = new MatchedLine();
51
52 // Snippet may contain multiple lines separated by "\n".
53 // Locate the lines of the content that contain the substring.
54 final int index = SearchUtil.contains(contents, substring);
55 if (index != -1) {
56 // Match found. Find the corresponding line.
57 int start = index - 1;
58 while (start > -1) {
59 if (contents.charAt(start) == '\n') {
60 break;
61 }
62 start--;
63 }
64 int end = index + 1;
65 while (end < contents.length()) {
66 if (contents.charAt(end) == '\n') {
67 break;
68 }
69 end++;
70 }
71 matched.line = contents.substring(start + 1, end);
72 matched.startIndex = index - (start + 1);
73 }
74 return matched;
75 }
76
77 /**
78 * Similar to String.contains() with two main differences:
79 * <p>
80 * 1) Only searches token prefixes. A token is defined as any combination of letters or
81 * numbers.
82 * <p>
83 * 2) Returns the starting index where the substring is found.
84 *
85 * @param value The string to search.
86 * @param substring The substring to look for.
87 * @return The starting index where the substring is found. {@literal -1} if substring is not
88 * found in value.
89 */
90 @VisibleForTesting
91 static int contains(String value, String substring) {
92 if (value.length() < substring.length()) {
93 return -1;
94 }
95
96 // i18n support
97 // Generate the code points for the substring once.
98 // There will be a maximum of substring.length code points. But may be fewer.
99 // Since the array length is not an accurate size, we need to keep a separate variable.
100 final int[] substringCodePoints = new int[substring.length()];
101 int substringLength = 0; // may not equal substring.length()!!
102 for (int i = 0; i < substring.length(); ) {
103 final int codePoint = Character.codePointAt(substring, i);
104 substringCodePoints[substringLength] = codePoint;
105 substringLength++;
106 i += Character.charCount(codePoint);
107 }
108
Chiao Chengecba27e2012-12-27 17:14:53 -0800109 for (int i = 0; i < value.length(); i = findNextTokenStart(value, i)) {
Chiao Chengecba27e2012-12-27 17:14:53 -0800110 int numMatch = 0;
Jay Shrauner6ae8b192013-01-09 11:53:46 -0800111 for (int j = i; j < value.length() && numMatch < substringLength; ++numMatch) {
112 int valueCp = Character.toLowerCase(value.codePointAt(j));
113 int substringCp = substringCodePoints[numMatch];
114 if (valueCp != substringCp) {
115 break;
Chiao Chengecba27e2012-12-27 17:14:53 -0800116 }
Jay Shrauner6ae8b192013-01-09 11:53:46 -0800117 j += Character.charCount(valueCp);
Chiao Chengecba27e2012-12-27 17:14:53 -0800118 }
Jay Shrauner6ae8b192013-01-09 11:53:46 -0800119 if (numMatch == substringLength) {
120 return i;
121 }
Chiao Chengecba27e2012-12-27 17:14:53 -0800122 }
123 return -1;
124 }
125
126 /**
127 * Find the start of the next token. A token is composed of letters and numbers. Any other
128 * character are considered delimiters.
129 *
130 * @param line The string to search for the next token.
131 * @param startIndex The index to start searching. 0 based indexing.
132 * @return The index for the start of the next token. line.length() if next token not found.
133 */
134 @VisibleForTesting
135 static int findNextTokenStart(String line, int startIndex) {
136 int index = startIndex;
137
138 // If already in token, eat remainder of token.
139 while (index <= line.length()) {
140 if (index == line.length()) {
141 // No more tokens.
142 return index;
143 }
144 final int codePoint = line.codePointAt(index);
145 if (!Character.isLetterOrDigit(codePoint)) {
146 break;
147 }
148 index += Character.charCount(codePoint);
149 }
150
151 // Out of token, eat all consecutive delimiters.
152 while (index <= line.length()) {
153 if (index == line.length()) {
154 return index;
155 }
156 final int codePoint = line.codePointAt(index);
157 if (Character.isLetterOrDigit(codePoint)) {
158 break;
159 }
160 index += Character.charCount(codePoint);
161 }
162
163 return index;
164 }
165
166 /**
167 * Anything other than letter and numbers are considered delimiters. Remove start and end
168 * delimiters since they are not relevant to search.
169 *
170 * @param query The query string to clean.
171 * @return The cleaned query. Empty string if all characters are cleaned out.
172 */
173 public static String cleanStartAndEndOfSearchQuery(String query) {
174 int start = 0;
175 while (start < query.length()) {
176 int codePoint = query.codePointAt(start);
177 if (Character.isLetterOrDigit(codePoint)) {
178 break;
179 }
180 start += Character.charCount(codePoint);
181 }
182
183 if (start == query.length()) {
184 // All characters are delimiters.
185 return "";
186 }
187
188 int end = query.length() - 1;
189 while (end > -1) {
190 if (Character.isLowSurrogate(query.charAt(end))) {
191 // Assume valid i18n string. There should be a matching high surrogate before it.
192 end--;
193 }
194 int codePoint = query.codePointAt(end);
195 if (Character.isLetterOrDigit(codePoint)) {
196 break;
197 }
198 end--;
199 }
200
201 // end is a letter or digit.
202 return query.substring(start, end + 1);
203 }
204}