blob: 9084ea6ece01aa9b4a7f1bca95f2cd7e94707584 [file] [log] [blame]
Daniel Dunbare6551282009-09-16 22:38:48 +00001//===-- StringRef.cpp - Lightweight String References ---------------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9
10#include "llvm/ADT/StringRef.h"
Douglas Gregor441c8b42009-12-30 17:23:44 +000011#include <vector>
Daniel Dunbare6551282009-09-16 22:38:48 +000012using namespace llvm;
13
Daniel Dunbar77696be2009-09-22 03:34:40 +000014// MSVC emits references to this into the translation units which reference it.
15#ifndef _MSC_VER
Daniel Dunbare6551282009-09-16 22:38:48 +000016const size_t StringRef::npos;
Daniel Dunbar77696be2009-09-22 03:34:40 +000017#endif
Chris Lattnercea14382009-09-19 19:47:14 +000018
Benjamin Kramer05872ea2009-11-12 20:36:59 +000019static char ascii_tolower(char x) {
20 if (x >= 'A' && x <= 'Z')
21 return x - 'A' + 'a';
22 return x;
23}
24
25/// compare_lower - Compare strings, ignoring case.
26int StringRef::compare_lower(StringRef RHS) const {
Daniel Dunbar58ce7ac2009-11-19 18:53:18 +000027 for (size_t I = 0, E = min(Length, RHS.Length); I != E; ++I) {
Benjamin Kramer05872ea2009-11-12 20:36:59 +000028 char LHC = ascii_tolower(Data[I]);
29 char RHC = ascii_tolower(RHS.Data[I]);
30 if (LHC != RHC)
31 return LHC < RHC ? -1 : 1;
32 }
33
34 if (Length == RHS.Length)
35 return 0;
36 return Length < RHS.Length ? -1 : 1;
37}
38
Douglas Gregor441c8b42009-12-30 17:23:44 +000039/// \brief Compute the edit distance between the two given strings.
40unsigned StringRef::edit_distance(llvm::StringRef Other,
41 bool AllowReplacements) {
42 size_type m = size();
43 size_type n = Other.size();
44
45 std::vector<unsigned> previous(n+1, 0);
46 for (std::vector<unsigned>::size_type i = 0; i <= n; ++i)
47 previous[i] = i;
48
49 std::vector<unsigned> current(n+1, 0);
50 for (size_type y = 1; y <= m; ++y) {
51 current.assign(n+1, 0);
52 current[0] = y;
53 for (size_type x = 1; x <= n; ++x) {
54 if (AllowReplacements) {
55 current[x] = min(previous[x-1] + ((*this)[y-1] == Other[x-1]? 0u:1u),
56 min(current[x-1], previous[x])+1);
57 }
58 else {
59 if ((*this)[y-1] == Other[x-1]) current[x] = previous[x-1];
60 else current[x] = min(current[x-1], previous[x]) + 1;
61 }
62 }
63 current.swap(previous);
64 }
65
66 return previous[n];
67}
68
Chris Lattner05a32c82009-09-20 01:22:16 +000069//===----------------------------------------------------------------------===//
70// String Searching
71//===----------------------------------------------------------------------===//
72
73
74/// find - Search for the first string \arg Str in the string.
75///
76/// \return - The index of the first occurence of \arg Str, or npos if not
77/// found.
Daniel Dunbar64066bd2009-11-11 00:28:53 +000078size_t StringRef::find(StringRef Str, size_t From) const {
Chris Lattner05a32c82009-09-20 01:22:16 +000079 size_t N = Str.size();
80 if (N > Length)
81 return npos;
Daniel Dunbar58ce7ac2009-11-19 18:53:18 +000082 for (size_t e = Length - N + 1, i = min(From, e); i != e; ++i)
Chris Lattner05a32c82009-09-20 01:22:16 +000083 if (substr(i, N).equals(Str))
84 return i;
85 return npos;
86}
87
88/// rfind - Search for the last string \arg Str in the string.
89///
90/// \return - The index of the last occurence of \arg Str, or npos if not
91/// found.
Daniel Dunbar2928c832009-11-06 10:58:06 +000092size_t StringRef::rfind(StringRef Str) const {
Chris Lattner05a32c82009-09-20 01:22:16 +000093 size_t N = Str.size();
94 if (N > Length)
95 return npos;
96 for (size_t i = Length - N + 1, e = 0; i != e;) {
97 --i;
98 if (substr(i, N).equals(Str))
99 return i;
100 }
101 return npos;
102}
103
Daniel Dunbar64066bd2009-11-11 00:28:53 +0000104/// find_first_of - Find the first character in the string that is in \arg
105/// Chars, or npos if not found.
106///
107/// Note: O(size() * Chars.size())
108StringRef::size_type StringRef::find_first_of(StringRef Chars,
109 size_t From) const {
Daniel Dunbar58ce7ac2009-11-19 18:53:18 +0000110 for (size_type i = min(From, Length), e = Length; i != e; ++i)
Chris Lattner05a32c82009-09-20 01:22:16 +0000111 if (Chars.find(Data[i]) != npos)
112 return i;
113 return npos;
114}
115
116/// find_first_not_of - Find the first character in the string that is not
Daniel Dunbar64066bd2009-11-11 00:28:53 +0000117/// \arg C or npos if not found.
118StringRef::size_type StringRef::find_first_not_of(char C, size_t From) const {
Daniel Dunbar58ce7ac2009-11-19 18:53:18 +0000119 for (size_type i = min(From, Length), e = Length; i != e; ++i)
Daniel Dunbar64066bd2009-11-11 00:28:53 +0000120 if (Data[i] != C)
121 return i;
122 return npos;
123}
124
125/// find_first_not_of - Find the first character in the string that is not
126/// in the string \arg Chars, or npos if not found.
127///
128/// Note: O(size() * Chars.size())
129StringRef::size_type StringRef::find_first_not_of(StringRef Chars,
130 size_t From) const {
Daniel Dunbar58ce7ac2009-11-19 18:53:18 +0000131 for (size_type i = min(From, Length), e = Length; i != e; ++i)
Chris Lattner05a32c82009-09-20 01:22:16 +0000132 if (Chars.find(Data[i]) == npos)
133 return i;
134 return npos;
135}
136
137
138//===----------------------------------------------------------------------===//
139// Helpful Algorithms
140//===----------------------------------------------------------------------===//
141
142/// count - Return the number of non-overlapped occurrences of \arg Str in
143/// the string.
Daniel Dunbar2928c832009-11-06 10:58:06 +0000144size_t StringRef::count(StringRef Str) const {
Chris Lattner05a32c82009-09-20 01:22:16 +0000145 size_t Count = 0;
146 size_t N = Str.size();
147 if (N > Length)
148 return 0;
149 for (size_t i = 0, e = Length - N + 1; i != e; ++i)
150 if (substr(i, N).equals(Str))
151 ++Count;
152 return Count;
153}
154
Chris Lattner63c6b7d2009-09-19 23:58:48 +0000155/// GetAsUnsignedInteger - Workhorse method that converts a integer character
156/// sequence of radix up to 36 to an unsigned long long value.
Chris Lattnercea14382009-09-19 19:47:14 +0000157static bool GetAsUnsignedInteger(StringRef Str, unsigned Radix,
158 unsigned long long &Result) {
159 // Autosense radix if not specified.
160 if (Radix == 0) {
Chris Lattner6441e542009-09-20 22:56:43 +0000161 if (Str.startswith("0x")) {
162 Str = Str.substr(2);
163 Radix = 16;
164 } else if (Str.startswith("0b")) {
165 Str = Str.substr(2);
166 Radix = 2;
167 } else if (Str.startswith("0"))
168 Radix = 8;
169 else
Chris Lattnercea14382009-09-19 19:47:14 +0000170 Radix = 10;
Chris Lattnercea14382009-09-19 19:47:14 +0000171 }
172
173 // Empty strings (after the radix autosense) are invalid.
174 if (Str.empty()) return true;
175
176 // Parse all the bytes of the string given this radix. Watch for overflow.
177 Result = 0;
178 while (!Str.empty()) {
179 unsigned CharVal;
180 if (Str[0] >= '0' && Str[0] <= '9')
181 CharVal = Str[0]-'0';
182 else if (Str[0] >= 'a' && Str[0] <= 'z')
183 CharVal = Str[0]-'a'+10;
184 else if (Str[0] >= 'A' && Str[0] <= 'Z')
185 CharVal = Str[0]-'A'+10;
186 else
187 return true;
188
189 // If the parsed value is larger than the integer radix, the string is
190 // invalid.
191 if (CharVal >= Radix)
192 return true;
193
194 // Add in this character.
195 unsigned long long PrevResult = Result;
196 Result = Result*Radix+CharVal;
197
198 // Check for overflow.
199 if (Result < PrevResult)
200 return true;
201
202 Str = Str.substr(1);
203 }
204
205 return false;
206}
207
208bool StringRef::getAsInteger(unsigned Radix, unsigned long long &Result) const {
209 return GetAsUnsignedInteger(*this, Radix, Result);
210}
211
Chris Lattner63c6b7d2009-09-19 23:58:48 +0000212
213bool StringRef::getAsInteger(unsigned Radix, long long &Result) const {
214 unsigned long long ULLVal;
215
216 // Handle positive strings first.
217 if (empty() || front() != '-') {
218 if (GetAsUnsignedInteger(*this, Radix, ULLVal) ||
219 // Check for value so large it overflows a signed value.
220 (long long)ULLVal < 0)
221 return true;
222 Result = ULLVal;
223 return false;
224 }
225
226 // Get the positive part of the value.
227 if (GetAsUnsignedInteger(substr(1), Radix, ULLVal) ||
228 // Reject values so large they'd overflow as negative signed, but allow
229 // "-0". This negates the unsigned so that the negative isn't undefined
230 // on signed overflow.
231 (long long)-ULLVal > 0)
232 return true;
233
234 Result = -ULLVal;
235 return false;
236}
237
238bool StringRef::getAsInteger(unsigned Radix, int &Result) const {
239 long long Val;
240 if (getAsInteger(Radix, Val) ||
241 (int)Val != Val)
242 return true;
243 Result = Val;
244 return false;
245}
246
247bool StringRef::getAsInteger(unsigned Radix, unsigned &Result) const {
248 unsigned long long Val;
249 if (getAsInteger(Radix, Val) ||
250 (unsigned)Val != Val)
251 return true;
252 Result = Val;
253 return false;
254}