blob: e4a9984828f3edc74e88270c223395fd34f5cb35 [file] [log] [blame]
Daniel Dunbare6551282009-09-16 22:38:48 +00001//===-- StringRef.cpp - Lightweight String References ---------------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9
10#include "llvm/ADT/StringRef.h"
Douglas Gregor7e54d5b2009-12-31 04:24:34 +000011#include "llvm/ADT/SmallVector.h"
Daniel Dunbare6551282009-09-16 22:38:48 +000012using namespace llvm;
13
Daniel Dunbar77696be2009-09-22 03:34:40 +000014// MSVC emits references to this into the translation units which reference it.
15#ifndef _MSC_VER
Daniel Dunbare6551282009-09-16 22:38:48 +000016const size_t StringRef::npos;
Daniel Dunbar77696be2009-09-22 03:34:40 +000017#endif
Chris Lattnercea14382009-09-19 19:47:14 +000018
Benjamin Kramer05872ea2009-11-12 20:36:59 +000019static char ascii_tolower(char x) {
20 if (x >= 'A' && x <= 'Z')
21 return x - 'A' + 'a';
22 return x;
23}
24
25/// compare_lower - Compare strings, ignoring case.
26int StringRef::compare_lower(StringRef RHS) const {
Daniel Dunbar58ce7ac2009-11-19 18:53:18 +000027 for (size_t I = 0, E = min(Length, RHS.Length); I != E; ++I) {
Benjamin Kramer05872ea2009-11-12 20:36:59 +000028 char LHC = ascii_tolower(Data[I]);
29 char RHC = ascii_tolower(RHS.Data[I]);
30 if (LHC != RHC)
31 return LHC < RHC ? -1 : 1;
32 }
33
34 if (Length == RHS.Length)
35 return 0;
36 return Length < RHS.Length ? -1 : 1;
37}
38
Douglas Gregor7e54d5b2009-12-31 04:24:34 +000039// Compute the edit distance between the two given strings.
Douglas Gregor441c8b42009-12-30 17:23:44 +000040unsigned StringRef::edit_distance(llvm::StringRef Other,
41 bool AllowReplacements) {
Douglas Gregor7e54d5b2009-12-31 04:24:34 +000042 // The algorithm implemented below is the "classic"
43 // dynamic-programming algorithm for computing the Levenshtein
44 // distance, which is described here:
45 //
46 // http://en.wikipedia.org/wiki/Levenshtein_distance
47 //
48 // Although the algorithm is typically described using an m x n
49 // array, only two rows are used at a time, so this implemenation
50 // just keeps two separate vectors for those two rows.
Douglas Gregor441c8b42009-12-30 17:23:44 +000051 size_type m = size();
52 size_type n = Other.size();
53
Douglas Gregor7e54d5b2009-12-31 04:24:34 +000054 SmallVector<unsigned, 32> previous(n+1, 0);
55 for (SmallVector<unsigned, 32>::size_type i = 0; i <= n; ++i)
Douglas Gregor441c8b42009-12-30 17:23:44 +000056 previous[i] = i;
57
Douglas Gregor7e54d5b2009-12-31 04:24:34 +000058 SmallVector<unsigned, 32> current(n+1, 0);
Douglas Gregor441c8b42009-12-30 17:23:44 +000059 for (size_type y = 1; y <= m; ++y) {
60 current.assign(n+1, 0);
61 current[0] = y;
62 for (size_type x = 1; x <= n; ++x) {
63 if (AllowReplacements) {
64 current[x] = min(previous[x-1] + ((*this)[y-1] == Other[x-1]? 0u:1u),
65 min(current[x-1], previous[x])+1);
66 }
67 else {
68 if ((*this)[y-1] == Other[x-1]) current[x] = previous[x-1];
69 else current[x] = min(current[x-1], previous[x]) + 1;
70 }
71 }
72 current.swap(previous);
73 }
74
75 return previous[n];
76}
77
Chris Lattner05a32c82009-09-20 01:22:16 +000078//===----------------------------------------------------------------------===//
79// String Searching
80//===----------------------------------------------------------------------===//
81
82
83/// find - Search for the first string \arg Str in the string.
84///
85/// \return - The index of the first occurence of \arg Str, or npos if not
86/// found.
Daniel Dunbar64066bd2009-11-11 00:28:53 +000087size_t StringRef::find(StringRef Str, size_t From) const {
Chris Lattner05a32c82009-09-20 01:22:16 +000088 size_t N = Str.size();
89 if (N > Length)
90 return npos;
Daniel Dunbar58ce7ac2009-11-19 18:53:18 +000091 for (size_t e = Length - N + 1, i = min(From, e); i != e; ++i)
Chris Lattner05a32c82009-09-20 01:22:16 +000092 if (substr(i, N).equals(Str))
93 return i;
94 return npos;
95}
96
97/// rfind - Search for the last string \arg Str in the string.
98///
99/// \return - The index of the last occurence of \arg Str, or npos if not
100/// found.
Daniel Dunbar2928c832009-11-06 10:58:06 +0000101size_t StringRef::rfind(StringRef Str) const {
Chris Lattner05a32c82009-09-20 01:22:16 +0000102 size_t N = Str.size();
103 if (N > Length)
104 return npos;
105 for (size_t i = Length - N + 1, e = 0; i != e;) {
106 --i;
107 if (substr(i, N).equals(Str))
108 return i;
109 }
110 return npos;
111}
112
Daniel Dunbar64066bd2009-11-11 00:28:53 +0000113/// find_first_of - Find the first character in the string that is in \arg
114/// Chars, or npos if not found.
115///
116/// Note: O(size() * Chars.size())
117StringRef::size_type StringRef::find_first_of(StringRef Chars,
118 size_t From) const {
Daniel Dunbar58ce7ac2009-11-19 18:53:18 +0000119 for (size_type i = min(From, Length), e = Length; i != e; ++i)
Chris Lattner05a32c82009-09-20 01:22:16 +0000120 if (Chars.find(Data[i]) != npos)
121 return i;
122 return npos;
123}
124
125/// find_first_not_of - Find the first character in the string that is not
Daniel Dunbar64066bd2009-11-11 00:28:53 +0000126/// \arg C or npos if not found.
127StringRef::size_type StringRef::find_first_not_of(char C, size_t From) const {
Daniel Dunbar58ce7ac2009-11-19 18:53:18 +0000128 for (size_type i = min(From, Length), e = Length; i != e; ++i)
Daniel Dunbar64066bd2009-11-11 00:28:53 +0000129 if (Data[i] != C)
130 return i;
131 return npos;
132}
133
134/// find_first_not_of - Find the first character in the string that is not
135/// in the string \arg Chars, or npos if not found.
136///
137/// Note: O(size() * Chars.size())
138StringRef::size_type StringRef::find_first_not_of(StringRef Chars,
139 size_t From) const {
Daniel Dunbar58ce7ac2009-11-19 18:53:18 +0000140 for (size_type i = min(From, Length), e = Length; i != e; ++i)
Chris Lattner05a32c82009-09-20 01:22:16 +0000141 if (Chars.find(Data[i]) == npos)
142 return i;
143 return npos;
144}
145
146
147//===----------------------------------------------------------------------===//
148// Helpful Algorithms
149//===----------------------------------------------------------------------===//
150
151/// count - Return the number of non-overlapped occurrences of \arg Str in
152/// the string.
Daniel Dunbar2928c832009-11-06 10:58:06 +0000153size_t StringRef::count(StringRef Str) const {
Chris Lattner05a32c82009-09-20 01:22:16 +0000154 size_t Count = 0;
155 size_t N = Str.size();
156 if (N > Length)
157 return 0;
158 for (size_t i = 0, e = Length - N + 1; i != e; ++i)
159 if (substr(i, N).equals(Str))
160 ++Count;
161 return Count;
162}
163
Chris Lattner63c6b7d2009-09-19 23:58:48 +0000164/// GetAsUnsignedInteger - Workhorse method that converts a integer character
165/// sequence of radix up to 36 to an unsigned long long value.
Chris Lattnercea14382009-09-19 19:47:14 +0000166static bool GetAsUnsignedInteger(StringRef Str, unsigned Radix,
167 unsigned long long &Result) {
168 // Autosense radix if not specified.
169 if (Radix == 0) {
Chris Lattner6441e542009-09-20 22:56:43 +0000170 if (Str.startswith("0x")) {
171 Str = Str.substr(2);
172 Radix = 16;
173 } else if (Str.startswith("0b")) {
174 Str = Str.substr(2);
175 Radix = 2;
176 } else if (Str.startswith("0"))
177 Radix = 8;
178 else
Chris Lattnercea14382009-09-19 19:47:14 +0000179 Radix = 10;
Chris Lattnercea14382009-09-19 19:47:14 +0000180 }
181
182 // Empty strings (after the radix autosense) are invalid.
183 if (Str.empty()) return true;
184
185 // Parse all the bytes of the string given this radix. Watch for overflow.
186 Result = 0;
187 while (!Str.empty()) {
188 unsigned CharVal;
189 if (Str[0] >= '0' && Str[0] <= '9')
190 CharVal = Str[0]-'0';
191 else if (Str[0] >= 'a' && Str[0] <= 'z')
192 CharVal = Str[0]-'a'+10;
193 else if (Str[0] >= 'A' && Str[0] <= 'Z')
194 CharVal = Str[0]-'A'+10;
195 else
196 return true;
197
198 // If the parsed value is larger than the integer radix, the string is
199 // invalid.
200 if (CharVal >= Radix)
201 return true;
202
203 // Add in this character.
204 unsigned long long PrevResult = Result;
205 Result = Result*Radix+CharVal;
206
207 // Check for overflow.
208 if (Result < PrevResult)
209 return true;
210
211 Str = Str.substr(1);
212 }
213
214 return false;
215}
216
217bool StringRef::getAsInteger(unsigned Radix, unsigned long long &Result) const {
218 return GetAsUnsignedInteger(*this, Radix, Result);
219}
220
Chris Lattner63c6b7d2009-09-19 23:58:48 +0000221
222bool StringRef::getAsInteger(unsigned Radix, long long &Result) const {
223 unsigned long long ULLVal;
224
225 // Handle positive strings first.
226 if (empty() || front() != '-') {
227 if (GetAsUnsignedInteger(*this, Radix, ULLVal) ||
228 // Check for value so large it overflows a signed value.
229 (long long)ULLVal < 0)
230 return true;
231 Result = ULLVal;
232 return false;
233 }
234
235 // Get the positive part of the value.
236 if (GetAsUnsignedInteger(substr(1), Radix, ULLVal) ||
237 // Reject values so large they'd overflow as negative signed, but allow
238 // "-0". This negates the unsigned so that the negative isn't undefined
239 // on signed overflow.
240 (long long)-ULLVal > 0)
241 return true;
242
243 Result = -ULLVal;
244 return false;
245}
246
247bool StringRef::getAsInteger(unsigned Radix, int &Result) const {
248 long long Val;
249 if (getAsInteger(Radix, Val) ||
250 (int)Val != Val)
251 return true;
252 Result = Val;
253 return false;
254}
255
256bool StringRef::getAsInteger(unsigned Radix, unsigned &Result) const {
257 unsigned long long Val;
258 if (getAsInteger(Radix, Val) ||
259 (unsigned)Val != Val)
260 return true;
261 Result = Val;
262 return false;
263}