blob: 2b262dcd3fa71445425f61672760f65d07dc4e6b [file] [log] [blame]
Daniel Dunbare6551282009-09-16 22:38:48 +00001//===-- StringRef.cpp - Lightweight String References ---------------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9
10#include "llvm/ADT/StringRef.h"
John McCall1e7ad392010-02-28 09:55:58 +000011#include "llvm/ADT/APInt.h"
Douglas Gregorad6b6da2010-01-07 00:51:54 +000012
Daniel Dunbare6551282009-09-16 22:38:48 +000013using namespace llvm;
14
Daniel Dunbar77696be2009-09-22 03:34:40 +000015// MSVC emits references to this into the translation units which reference it.
16#ifndef _MSC_VER
Daniel Dunbare6551282009-09-16 22:38:48 +000017const size_t StringRef::npos;
Daniel Dunbar77696be2009-09-22 03:34:40 +000018#endif
Chris Lattnercea14382009-09-19 19:47:14 +000019
Benjamin Kramer05872ea2009-11-12 20:36:59 +000020static char ascii_tolower(char x) {
21 if (x >= 'A' && x <= 'Z')
22 return x - 'A' + 'a';
23 return x;
24}
25
26/// compare_lower - Compare strings, ignoring case.
27int StringRef::compare_lower(StringRef RHS) const {
Daniel Dunbar58ce7ac2009-11-19 18:53:18 +000028 for (size_t I = 0, E = min(Length, RHS.Length); I != E; ++I) {
Benjamin Kramer05872ea2009-11-12 20:36:59 +000029 char LHC = ascii_tolower(Data[I]);
30 char RHC = ascii_tolower(RHS.Data[I]);
31 if (LHC != RHC)
32 return LHC < RHC ? -1 : 1;
33 }
34
35 if (Length == RHS.Length)
36 return 0;
37 return Length < RHS.Length ? -1 : 1;
38}
39
Douglas Gregor7e54d5b2009-12-31 04:24:34 +000040// Compute the edit distance between the two given strings.
Douglas Gregor441c8b42009-12-30 17:23:44 +000041unsigned StringRef::edit_distance(llvm::StringRef Other,
42 bool AllowReplacements) {
Douglas Gregor7e54d5b2009-12-31 04:24:34 +000043 // The algorithm implemented below is the "classic"
44 // dynamic-programming algorithm for computing the Levenshtein
45 // distance, which is described here:
46 //
47 // http://en.wikipedia.org/wiki/Levenshtein_distance
48 //
49 // Although the algorithm is typically described using an m x n
50 // array, only two rows are used at a time, so this implemenation
51 // just keeps two separate vectors for those two rows.
Douglas Gregor441c8b42009-12-30 17:23:44 +000052 size_type m = size();
53 size_type n = Other.size();
54
Douglas Gregor2772ea82010-01-07 02:24:06 +000055 const unsigned SmallBufferSize = 64;
56 unsigned SmallBuffer[SmallBufferSize];
57 unsigned *Allocated = 0;
58 unsigned *previous = SmallBuffer;
59 if (2*(n + 1) > SmallBufferSize)
60 Allocated = previous = new unsigned [2*(n+1)];
61 unsigned *current = previous + (n + 1);
Douglas Gregorad6b6da2010-01-07 00:51:54 +000062
63 for (unsigned i = 0; i <= n; ++i)
Douglas Gregor441c8b42009-12-30 17:23:44 +000064 previous[i] = i;
65
Douglas Gregor441c8b42009-12-30 17:23:44 +000066 for (size_type y = 1; y <= m; ++y) {
Douglas Gregor441c8b42009-12-30 17:23:44 +000067 current[0] = y;
68 for (size_type x = 1; x <= n; ++x) {
69 if (AllowReplacements) {
70 current[x] = min(previous[x-1] + ((*this)[y-1] == Other[x-1]? 0u:1u),
71 min(current[x-1], previous[x])+1);
72 }
73 else {
74 if ((*this)[y-1] == Other[x-1]) current[x] = previous[x-1];
75 else current[x] = min(current[x-1], previous[x]) + 1;
76 }
77 }
Douglas Gregorad6b6da2010-01-07 00:51:54 +000078
79 unsigned *tmp = current;
80 current = previous;
81 previous = tmp;
Douglas Gregor441c8b42009-12-30 17:23:44 +000082 }
83
Douglas Gregorad6b6da2010-01-07 00:51:54 +000084 unsigned Result = previous[n];
Douglas Gregor2772ea82010-01-07 02:24:06 +000085 delete [] Allocated;
Douglas Gregorad6b6da2010-01-07 00:51:54 +000086
87 return Result;
Douglas Gregor441c8b42009-12-30 17:23:44 +000088}
89
Chris Lattner05a32c82009-09-20 01:22:16 +000090//===----------------------------------------------------------------------===//
91// String Searching
92//===----------------------------------------------------------------------===//
93
94
95/// find - Search for the first string \arg Str in the string.
96///
97/// \return - The index of the first occurence of \arg Str, or npos if not
98/// found.
Daniel Dunbar64066bd2009-11-11 00:28:53 +000099size_t StringRef::find(StringRef Str, size_t From) const {
Chris Lattner05a32c82009-09-20 01:22:16 +0000100 size_t N = Str.size();
101 if (N > Length)
102 return npos;
Daniel Dunbar58ce7ac2009-11-19 18:53:18 +0000103 for (size_t e = Length - N + 1, i = min(From, e); i != e; ++i)
Chris Lattner05a32c82009-09-20 01:22:16 +0000104 if (substr(i, N).equals(Str))
105 return i;
106 return npos;
107}
108
109/// rfind - Search for the last string \arg Str in the string.
110///
111/// \return - The index of the last occurence of \arg Str, or npos if not
112/// found.
Daniel Dunbar2928c832009-11-06 10:58:06 +0000113size_t StringRef::rfind(StringRef Str) const {
Chris Lattner05a32c82009-09-20 01:22:16 +0000114 size_t N = Str.size();
115 if (N > Length)
116 return npos;
117 for (size_t i = Length - N + 1, e = 0; i != e;) {
118 --i;
119 if (substr(i, N).equals(Str))
120 return i;
121 }
122 return npos;
123}
124
Daniel Dunbar64066bd2009-11-11 00:28:53 +0000125/// find_first_of - Find the first character in the string that is in \arg
126/// Chars, or npos if not found.
127///
128/// Note: O(size() * Chars.size())
129StringRef::size_type StringRef::find_first_of(StringRef Chars,
130 size_t From) const {
Daniel Dunbar58ce7ac2009-11-19 18:53:18 +0000131 for (size_type i = min(From, Length), e = Length; i != e; ++i)
Chris Lattner05a32c82009-09-20 01:22:16 +0000132 if (Chars.find(Data[i]) != npos)
133 return i;
134 return npos;
135}
136
137/// find_first_not_of - Find the first character in the string that is not
Daniel Dunbar64066bd2009-11-11 00:28:53 +0000138/// \arg C or npos if not found.
139StringRef::size_type StringRef::find_first_not_of(char C, size_t From) const {
Daniel Dunbar58ce7ac2009-11-19 18:53:18 +0000140 for (size_type i = min(From, Length), e = Length; i != e; ++i)
Daniel Dunbar64066bd2009-11-11 00:28:53 +0000141 if (Data[i] != C)
142 return i;
143 return npos;
144}
145
146/// find_first_not_of - Find the first character in the string that is not
147/// in the string \arg Chars, or npos if not found.
148///
149/// Note: O(size() * Chars.size())
150StringRef::size_type StringRef::find_first_not_of(StringRef Chars,
151 size_t From) const {
Daniel Dunbar58ce7ac2009-11-19 18:53:18 +0000152 for (size_type i = min(From, Length), e = Length; i != e; ++i)
Chris Lattner05a32c82009-09-20 01:22:16 +0000153 if (Chars.find(Data[i]) == npos)
154 return i;
155 return npos;
156}
157
158
159//===----------------------------------------------------------------------===//
160// Helpful Algorithms
161//===----------------------------------------------------------------------===//
162
163/// count - Return the number of non-overlapped occurrences of \arg Str in
164/// the string.
Daniel Dunbar2928c832009-11-06 10:58:06 +0000165size_t StringRef::count(StringRef Str) const {
Chris Lattner05a32c82009-09-20 01:22:16 +0000166 size_t Count = 0;
167 size_t N = Str.size();
168 if (N > Length)
169 return 0;
170 for (size_t i = 0, e = Length - N + 1; i != e; ++i)
171 if (substr(i, N).equals(Str))
172 ++Count;
173 return Count;
174}
175
John McCall1e7ad392010-02-28 09:55:58 +0000176static unsigned GetAutoSenseRadix(StringRef &Str) {
177 if (Str.startswith("0x")) {
178 Str = Str.substr(2);
179 return 16;
180 } else if (Str.startswith("0b")) {
181 Str = Str.substr(2);
182 return 2;
183 } else if (Str.startswith("0")) {
184 return 8;
185 } else {
186 return 10;
187 }
188}
189
190
Chris Lattner63c6b7d2009-09-19 23:58:48 +0000191/// GetAsUnsignedInteger - Workhorse method that converts a integer character
192/// sequence of radix up to 36 to an unsigned long long value.
Chris Lattnercea14382009-09-19 19:47:14 +0000193static bool GetAsUnsignedInteger(StringRef Str, unsigned Radix,
194 unsigned long long &Result) {
195 // Autosense radix if not specified.
John McCall1e7ad392010-02-28 09:55:58 +0000196 if (Radix == 0)
197 Radix = GetAutoSenseRadix(Str);
Chris Lattnercea14382009-09-19 19:47:14 +0000198
199 // Empty strings (after the radix autosense) are invalid.
200 if (Str.empty()) return true;
201
202 // Parse all the bytes of the string given this radix. Watch for overflow.
203 Result = 0;
204 while (!Str.empty()) {
205 unsigned CharVal;
206 if (Str[0] >= '0' && Str[0] <= '9')
207 CharVal = Str[0]-'0';
208 else if (Str[0] >= 'a' && Str[0] <= 'z')
209 CharVal = Str[0]-'a'+10;
210 else if (Str[0] >= 'A' && Str[0] <= 'Z')
211 CharVal = Str[0]-'A'+10;
212 else
213 return true;
214
215 // If the parsed value is larger than the integer radix, the string is
216 // invalid.
217 if (CharVal >= Radix)
218 return true;
219
220 // Add in this character.
221 unsigned long long PrevResult = Result;
222 Result = Result*Radix+CharVal;
223
224 // Check for overflow.
225 if (Result < PrevResult)
226 return true;
227
228 Str = Str.substr(1);
229 }
230
231 return false;
232}
233
234bool StringRef::getAsInteger(unsigned Radix, unsigned long long &Result) const {
235 return GetAsUnsignedInteger(*this, Radix, Result);
236}
237
Chris Lattner63c6b7d2009-09-19 23:58:48 +0000238
239bool StringRef::getAsInteger(unsigned Radix, long long &Result) const {
240 unsigned long long ULLVal;
241
242 // Handle positive strings first.
243 if (empty() || front() != '-') {
244 if (GetAsUnsignedInteger(*this, Radix, ULLVal) ||
245 // Check for value so large it overflows a signed value.
246 (long long)ULLVal < 0)
247 return true;
248 Result = ULLVal;
249 return false;
250 }
251
252 // Get the positive part of the value.
253 if (GetAsUnsignedInteger(substr(1), Radix, ULLVal) ||
254 // Reject values so large they'd overflow as negative signed, but allow
255 // "-0". This negates the unsigned so that the negative isn't undefined
256 // on signed overflow.
257 (long long)-ULLVal > 0)
258 return true;
259
260 Result = -ULLVal;
261 return false;
262}
263
264bool StringRef::getAsInteger(unsigned Radix, int &Result) const {
265 long long Val;
266 if (getAsInteger(Radix, Val) ||
267 (int)Val != Val)
268 return true;
269 Result = Val;
270 return false;
271}
272
273bool StringRef::getAsInteger(unsigned Radix, unsigned &Result) const {
274 unsigned long long Val;
275 if (getAsInteger(Radix, Val) ||
276 (unsigned)Val != Val)
277 return true;
278 Result = Val;
279 return false;
280}
John McCall1e7ad392010-02-28 09:55:58 +0000281
282bool StringRef::getAsInteger(unsigned Radix, APInt &Result) const {
283 StringRef Str = *this;
284
285 // Autosense radix if not specified.
286 if (Radix == 0)
287 Radix = GetAutoSenseRadix(Str);
288
289 assert(Radix > 1 && Radix <= 36);
290
291 // Empty strings (after the radix autosense) are invalid.
292 if (Str.empty()) return true;
293
294 // Skip leading zeroes. This can be a significant improvement if
295 // it means we don't need > 64 bits.
296 while (!Str.empty() && Str.front() == '0')
297 Str = Str.substr(1);
298
299 // If it was nothing but zeroes....
300 if (Str.empty()) {
301 Result = APInt(64, 0);
302 return false;
303 }
304
305 // (Over-)estimate the required number of bits.
306 unsigned Log2Radix = 0;
307 while ((1U << Log2Radix) < Radix) Log2Radix++;
308 bool IsPowerOf2Radix = ((1U << Log2Radix) == Radix);
309
310 unsigned BitWidth = Log2Radix * Str.size();
311 if (BitWidth < Result.getBitWidth())
312 BitWidth = Result.getBitWidth(); // don't shrink the result
313 else
314 Result.zext(BitWidth);
315
316 APInt RadixAP, CharAP; // unused unless !IsPowerOf2Radix
317 if (!IsPowerOf2Radix) {
318 // These must have the same bit-width as Result.
319 RadixAP = APInt(BitWidth, Radix);
320 CharAP = APInt(BitWidth, 0);
321 }
322
323 // Parse all the bytes of the string given this radix.
324 Result = 0;
325 while (!Str.empty()) {
326 unsigned CharVal;
327 if (Str[0] >= '0' && Str[0] <= '9')
328 CharVal = Str[0]-'0';
329 else if (Str[0] >= 'a' && Str[0] <= 'z')
330 CharVal = Str[0]-'a'+10;
331 else if (Str[0] >= 'A' && Str[0] <= 'Z')
332 CharVal = Str[0]-'A'+10;
333 else
334 return true;
335
336 // If the parsed value is larger than the integer radix, the string is
337 // invalid.
338 if (CharVal >= Radix)
339 return true;
340
341 // Add in this character.
342 if (IsPowerOf2Radix) {
343 Result <<= Log2Radix;
344 Result |= CharVal;
345 } else {
346 Result *= RadixAP;
347 CharAP = CharVal;
348 Result += CharAP;
349 }
350
351 Str = Str.substr(1);
352 }
353
354 return false;
355}