blob: 9b7cc1c1d182cfeddd9e98bd476cefb886a13778 [file] [log] [blame]
Daniel Dunbar44981682009-09-16 22:38:48 +00001//===-- StringRef.cpp - Lightweight String References ---------------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9
10#include "llvm/ADT/StringRef.h"
Zachary Turner8bd42a12017-02-14 19:06:37 +000011#include "llvm/ADT/APFloat.h"
John McCall512b6502010-02-28 09:55:58 +000012#include "llvm/ADT/APInt.h"
Chandler Carruthca99ad32012-03-04 10:55:27 +000013#include "llvm/ADT/Hashing.h"
Kaelyn Uhrain7a9ccf42012-02-15 22:13:07 +000014#include "llvm/ADT/edit_distance.h"
Benjamin Kramer08fd2cf2010-08-23 18:16:08 +000015#include <bitset>
Douglas Gregor09470e62010-01-07 00:51:54 +000016
Daniel Dunbar44981682009-09-16 22:38:48 +000017using namespace llvm;
18
Daniel Dunbarc827d9e2009-09-22 03:34:40 +000019// MSVC emits references to this into the translation units which reference it.
20#ifndef _MSC_VER
Daniel Dunbar44981682009-09-16 22:38:48 +000021const size_t StringRef::npos;
Daniel Dunbarc827d9e2009-09-22 03:34:40 +000022#endif
Chris Lattner68ee7002009-09-19 19:47:14 +000023
Benjamin Kramer68e49452009-11-12 20:36:59 +000024static char ascii_tolower(char x) {
25 if (x >= 'A' && x <= 'Z')
26 return x - 'A' + 'a';
27 return x;
28}
29
Daniel Dunbar3fa528d2011-11-06 18:04:43 +000030static char ascii_toupper(char x) {
31 if (x >= 'a' && x <= 'z')
32 return x - 'a' + 'A';
33 return x;
34}
35
Jakob Stoklund Olesend1d7ed62010-05-26 21:47:28 +000036static bool ascii_isdigit(char x) {
37 return x >= '0' && x <= '9';
38}
39
Rui Ueyama00e24e42013-10-30 18:32:26 +000040// strncasecmp() is not available on non-POSIX systems, so define an
41// alternative function here.
42static int ascii_strncasecmp(const char *LHS, const char *RHS, size_t Length) {
43 for (size_t I = 0; I < Length; ++I) {
44 unsigned char LHC = ascii_tolower(LHS[I]);
45 unsigned char RHC = ascii_tolower(RHS[I]);
Benjamin Kramer68e49452009-11-12 20:36:59 +000046 if (LHC != RHC)
47 return LHC < RHC ? -1 : 1;
48 }
Rui Ueyama00e24e42013-10-30 18:32:26 +000049 return 0;
50}
Benjamin Kramer68e49452009-11-12 20:36:59 +000051
Rui Ueyama00e24e42013-10-30 18:32:26 +000052/// compare_lower - Compare strings, ignoring case.
53int StringRef::compare_lower(StringRef RHS) const {
Craig Topper3ced27c2014-08-21 04:31:10 +000054 if (int Res = ascii_strncasecmp(Data, RHS.Data, std::min(Length, RHS.Length)))
Rui Ueyama00e24e42013-10-30 18:32:26 +000055 return Res;
Benjamin Kramer68e49452009-11-12 20:36:59 +000056 if (Length == RHS.Length)
Benjamin Kramerb04d4af2010-08-26 14:21:08 +000057 return 0;
Benjamin Kramer68e49452009-11-12 20:36:59 +000058 return Length < RHS.Length ? -1 : 1;
59}
60
Rui Ueyama00e24e42013-10-30 18:32:26 +000061/// Check if this string starts with the given \p Prefix, ignoring case.
62bool StringRef::startswith_lower(StringRef Prefix) const {
63 return Length >= Prefix.Length &&
64 ascii_strncasecmp(Data, Prefix.Data, Prefix.Length) == 0;
65}
66
67/// Check if this string ends with the given \p Suffix, ignoring case.
68bool StringRef::endswith_lower(StringRef Suffix) const {
69 return Length >= Suffix.Length &&
70 ascii_strncasecmp(end() - Suffix.Length, Suffix.Data, Suffix.Length) == 0;
71}
72
Zachary Turner17412b02016-11-12 17:17:12 +000073size_t StringRef::find_lower(char C, size_t From) const {
74 char L = ascii_tolower(C);
75 return find_if([L](char D) { return ascii_tolower(D) == L; }, From);
76}
77
Jakob Stoklund Olesend1d7ed62010-05-26 21:47:28 +000078/// compare_numeric - Compare strings, handle embedded numbers.
79int StringRef::compare_numeric(StringRef RHS) const {
Craig Topper3ced27c2014-08-21 04:31:10 +000080 for (size_t I = 0, E = std::min(Length, RHS.Length); I != E; ++I) {
Jakob Stoklund Olesenc874e2d2011-09-30 17:03:55 +000081 // Check for sequences of digits.
Jakob Stoklund Olesend1d7ed62010-05-26 21:47:28 +000082 if (ascii_isdigit(Data[I]) && ascii_isdigit(RHS.Data[I])) {
Jakob Stoklund Olesenc874e2d2011-09-30 17:03:55 +000083 // The longer sequence of numbers is considered larger.
84 // This doesn't really handle prefixed zeros well.
85 size_t J;
86 for (J = I + 1; J != E + 1; ++J) {
Jakob Stoklund Olesend1d7ed62010-05-26 21:47:28 +000087 bool ld = J < Length && ascii_isdigit(Data[J]);
88 bool rd = J < RHS.Length && ascii_isdigit(RHS.Data[J]);
89 if (ld != rd)
90 return rd ? -1 : 1;
91 if (!rd)
92 break;
93 }
Jakob Stoklund Olesenc874e2d2011-09-30 17:03:55 +000094 // The two number sequences have the same length (J-I), just memcmp them.
95 if (int Res = compareMemory(Data + I, RHS.Data + I, J - I))
96 return Res < 0 ? -1 : 1;
97 // Identical number sequences, continue search after the numbers.
98 I = J - 1;
99 continue;
Jakob Stoklund Olesend1d7ed62010-05-26 21:47:28 +0000100 }
Jakob Stoklund Olesenc874e2d2011-09-30 17:03:55 +0000101 if (Data[I] != RHS.Data[I])
102 return (unsigned char)Data[I] < (unsigned char)RHS.Data[I] ? -1 : 1;
Jakob Stoklund Olesend1d7ed62010-05-26 21:47:28 +0000103 }
104 if (Length == RHS.Length)
Benjamin Kramerb04d4af2010-08-26 14:21:08 +0000105 return 0;
Jakob Stoklund Olesend1d7ed62010-05-26 21:47:28 +0000106 return Length < RHS.Length ? -1 : 1;
107}
108
Douglas Gregor5639af42009-12-31 04:24:34 +0000109// Compute the edit distance between the two given strings.
Michael J. Spencerf13f4422010-11-26 04:16:08 +0000110unsigned StringRef::edit_distance(llvm::StringRef Other,
Douglas Gregor21afc3b2010-10-19 22:13:48 +0000111 bool AllowReplacements,
Dmitri Gribenko292c9202013-08-24 01:50:41 +0000112 unsigned MaxEditDistance) const {
Kaelyn Uhrain7a9ccf42012-02-15 22:13:07 +0000113 return llvm::ComputeEditDistance(
Craig Toppere1d12942014-08-27 05:25:25 +0000114 makeArrayRef(data(), size()),
115 makeArrayRef(Other.data(), Other.size()),
Kaelyn Uhrain7a9ccf42012-02-15 22:13:07 +0000116 AllowReplacements, MaxEditDistance);
Douglas Gregor165882c2009-12-30 17:23:44 +0000117}
118
Chris Lattner372a8ae2009-09-20 01:22:16 +0000119//===----------------------------------------------------------------------===//
Daniel Dunbar3fa528d2011-11-06 18:04:43 +0000120// String Operations
121//===----------------------------------------------------------------------===//
122
123std::string StringRef::lower() const {
124 std::string Result(size(), char());
125 for (size_type i = 0, e = size(); i != e; ++i) {
126 Result[i] = ascii_tolower(Data[i]);
127 }
128 return Result;
129}
130
131std::string StringRef::upper() const {
132 std::string Result(size(), char());
133 for (size_type i = 0, e = size(); i != e; ++i) {
Benjamin Kramere3b94d12011-11-06 20:36:50 +0000134 Result[i] = ascii_toupper(Data[i]);
Daniel Dunbar3fa528d2011-11-06 18:04:43 +0000135 }
136 return Result;
137}
138
139//===----------------------------------------------------------------------===//
Chris Lattner372a8ae2009-09-20 01:22:16 +0000140// String Searching
141//===----------------------------------------------------------------------===//
142
143
144/// find - Search for the first string \arg Str in the string.
145///
Chris Lattner0ab5e2c2011-04-15 05:18:47 +0000146/// \return - The index of the first occurrence of \arg Str, or npos if not
Chris Lattner372a8ae2009-09-20 01:22:16 +0000147/// found.
Daniel Dunbar9806e4a2009-11-11 00:28:53 +0000148size_t StringRef::find(StringRef Str, size_t From) const {
Chandler Carruth233edd22015-09-10 11:17:49 +0000149 if (From > Length)
Chris Lattner372a8ae2009-09-20 01:22:16 +0000150 return npos;
Benjamin Kramer4d681d72011-10-15 10:08:31 +0000151
Chandler Carruthecbe6192016-12-11 07:46:21 +0000152 const char *Start = Data + From;
153 size_t Size = Length - From;
154
Chandler Carruth233edd22015-09-10 11:17:49 +0000155 const char *Needle = Str.data();
156 size_t N = Str.size();
157 if (N == 0)
158 return From;
Chandler Carruth233edd22015-09-10 11:17:49 +0000159 if (Size < N)
160 return npos;
Chandler Carruthecbe6192016-12-11 07:46:21 +0000161 if (N == 1) {
162 const char *Ptr = (const char *)::memchr(Start, Needle[0], Size);
163 return Ptr == nullptr ? npos : Ptr - Data;
164 }
Chandler Carruth233edd22015-09-10 11:17:49 +0000165
Chandler Carruth233edd22015-09-10 11:17:49 +0000166 const char *Stop = Start + (Size - N + 1);
167
Benjamin Kramer4d681d72011-10-15 10:08:31 +0000168 // For short haystacks or unsupported needles fall back to the naive algorithm
Chandler Carruth233edd22015-09-10 11:17:49 +0000169 if (Size < 16 || N > 255) {
170 do {
171 if (std::memcmp(Start, Needle, N) == 0)
172 return Start - Data;
173 ++Start;
174 } while (Start < Stop);
Benjamin Kramer4d681d72011-10-15 10:08:31 +0000175 return npos;
176 }
177
178 // Build the bad char heuristic table, with uint8_t to reduce cache thrashing.
179 uint8_t BadCharSkip[256];
180 std::memset(BadCharSkip, N, 256);
181 for (unsigned i = 0; i != N-1; ++i)
182 BadCharSkip[(uint8_t)Str[i]] = N-1-i;
183
Chandler Carruth233edd22015-09-10 11:17:49 +0000184 do {
Chandler Carruthecbe6192016-12-11 07:46:21 +0000185 uint8_t Last = Start[N - 1];
186 if (LLVM_UNLIKELY(Last == (uint8_t)Needle[N - 1]))
187 if (std::memcmp(Start, Needle, N - 1) == 0)
188 return Start - Data;
Benjamin Kramer4d681d72011-10-15 10:08:31 +0000189
190 // Otherwise skip the appropriate number of bytes.
Chandler Carruthecbe6192016-12-11 07:46:21 +0000191 Start += BadCharSkip[Last];
Chandler Carruth233edd22015-09-10 11:17:49 +0000192 } while (Start < Stop);
Benjamin Kramer4d681d72011-10-15 10:08:31 +0000193
Chris Lattner372a8ae2009-09-20 01:22:16 +0000194 return npos;
195}
196
Zachary Turner17412b02016-11-12 17:17:12 +0000197size_t StringRef::find_lower(StringRef Str, size_t From) const {
198 StringRef This = substr(From);
199 while (This.size() >= Str.size()) {
200 if (This.startswith_lower(Str))
201 return From;
202 This = This.drop_front();
203 ++From;
204 }
205 return npos;
206}
207
208size_t StringRef::rfind_lower(char C, size_t From) const {
209 From = std::min(From, Length);
210 size_t i = From;
211 while (i != 0) {
212 --i;
213 if (ascii_tolower(Data[i]) == ascii_tolower(C))
214 return i;
215 }
216 return npos;
217}
218
Chris Lattner372a8ae2009-09-20 01:22:16 +0000219/// rfind - Search for the last string \arg Str in the string.
220///
Chris Lattner0ab5e2c2011-04-15 05:18:47 +0000221/// \return - The index of the last occurrence of \arg Str, or npos if not
Chris Lattner372a8ae2009-09-20 01:22:16 +0000222/// found.
Daniel Dunbarad36e8a2009-11-06 10:58:06 +0000223size_t StringRef::rfind(StringRef Str) const {
Chris Lattner372a8ae2009-09-20 01:22:16 +0000224 size_t N = Str.size();
225 if (N > Length)
226 return npos;
227 for (size_t i = Length - N + 1, e = 0; i != e;) {
228 --i;
229 if (substr(i, N).equals(Str))
230 return i;
231 }
232 return npos;
233}
234
Zachary Turner17412b02016-11-12 17:17:12 +0000235size_t StringRef::rfind_lower(StringRef Str) const {
236 size_t N = Str.size();
237 if (N > Length)
238 return npos;
239 for (size_t i = Length - N + 1, e = 0; i != e;) {
240 --i;
241 if (substr(i, N).equals_lower(Str))
242 return i;
243 }
244 return npos;
245}
246
Daniel Dunbar9806e4a2009-11-11 00:28:53 +0000247/// find_first_of - Find the first character in the string that is in \arg
248/// Chars, or npos if not found.
249///
Benjamin Kramer08fd2cf2010-08-23 18:16:08 +0000250/// Note: O(size() + Chars.size())
Daniel Dunbar9806e4a2009-11-11 00:28:53 +0000251StringRef::size_type StringRef::find_first_of(StringRef Chars,
252 size_t From) const {
Benjamin Kramer08fd2cf2010-08-23 18:16:08 +0000253 std::bitset<1 << CHAR_BIT> CharBits;
254 for (size_type i = 0; i != Chars.size(); ++i)
255 CharBits.set((unsigned char)Chars[i]);
256
Craig Topper3ced27c2014-08-21 04:31:10 +0000257 for (size_type i = std::min(From, Length), e = Length; i != e; ++i)
Benjamin Kramer08fd2cf2010-08-23 18:16:08 +0000258 if (CharBits.test((unsigned char)Data[i]))
Chris Lattner372a8ae2009-09-20 01:22:16 +0000259 return i;
260 return npos;
261}
262
263/// find_first_not_of - Find the first character in the string that is not
Daniel Dunbar9806e4a2009-11-11 00:28:53 +0000264/// \arg C or npos if not found.
265StringRef::size_type StringRef::find_first_not_of(char C, size_t From) const {
Craig Topper3ced27c2014-08-21 04:31:10 +0000266 for (size_type i = std::min(From, Length), e = Length; i != e; ++i)
Daniel Dunbar9806e4a2009-11-11 00:28:53 +0000267 if (Data[i] != C)
268 return i;
269 return npos;
270}
271
272/// find_first_not_of - Find the first character in the string that is not
273/// in the string \arg Chars, or npos if not found.
274///
Benjamin Kramer08fd2cf2010-08-23 18:16:08 +0000275/// Note: O(size() + Chars.size())
Daniel Dunbar9806e4a2009-11-11 00:28:53 +0000276StringRef::size_type StringRef::find_first_not_of(StringRef Chars,
277 size_t From) const {
Benjamin Kramer08fd2cf2010-08-23 18:16:08 +0000278 std::bitset<1 << CHAR_BIT> CharBits;
279 for (size_type i = 0; i != Chars.size(); ++i)
280 CharBits.set((unsigned char)Chars[i]);
281
Craig Topper3ced27c2014-08-21 04:31:10 +0000282 for (size_type i = std::min(From, Length), e = Length; i != e; ++i)
Benjamin Kramer08fd2cf2010-08-23 18:16:08 +0000283 if (!CharBits.test((unsigned char)Data[i]))
Chris Lattner372a8ae2009-09-20 01:22:16 +0000284 return i;
285 return npos;
286}
287
Michael J. Spencere1d3603d2010-11-30 23:27:35 +0000288/// find_last_of - Find the last character in the string that is in \arg C,
289/// or npos if not found.
290///
291/// Note: O(size() + Chars.size())
292StringRef::size_type StringRef::find_last_of(StringRef Chars,
293 size_t From) const {
294 std::bitset<1 << CHAR_BIT> CharBits;
295 for (size_type i = 0; i != Chars.size(); ++i)
296 CharBits.set((unsigned char)Chars[i]);
297
Craig Topper3ced27c2014-08-21 04:31:10 +0000298 for (size_type i = std::min(From, Length) - 1, e = -1; i != e; --i)
Michael J. Spencere1d3603d2010-11-30 23:27:35 +0000299 if (CharBits.test((unsigned char)Data[i]))
300 return i;
301 return npos;
302}
Chris Lattner372a8ae2009-09-20 01:22:16 +0000303
Michael J. Spencer93303812012-05-11 22:08:50 +0000304/// find_last_not_of - Find the last character in the string that is not
305/// \arg C, or npos if not found.
306StringRef::size_type StringRef::find_last_not_of(char C, size_t From) const {
Craig Topper3ced27c2014-08-21 04:31:10 +0000307 for (size_type i = std::min(From, Length) - 1, e = -1; i != e; --i)
Michael J. Spencer93303812012-05-11 22:08:50 +0000308 if (Data[i] != C)
309 return i;
310 return npos;
311}
312
313/// find_last_not_of - Find the last character in the string that is not in
314/// \arg Chars, or npos if not found.
315///
316/// Note: O(size() + Chars.size())
317StringRef::size_type StringRef::find_last_not_of(StringRef Chars,
318 size_t From) const {
319 std::bitset<1 << CHAR_BIT> CharBits;
320 for (size_type i = 0, e = Chars.size(); i != e; ++i)
321 CharBits.set((unsigned char)Chars[i]);
322
Craig Topper3ced27c2014-08-21 04:31:10 +0000323 for (size_type i = std::min(From, Length) - 1, e = -1; i != e; --i)
Michael J. Spencer93303812012-05-11 22:08:50 +0000324 if (!CharBits.test((unsigned char)Data[i]))
325 return i;
326 return npos;
327}
328
Duncan Sands8570b292012-02-21 12:00:25 +0000329void StringRef::split(SmallVectorImpl<StringRef> &A,
Chandler Carruth4425c912015-09-10 07:51:37 +0000330 StringRef Separator, int MaxSplit,
Duncan Sands8570b292012-02-21 12:00:25 +0000331 bool KeepEmpty) const {
Chandler Carruth4425c912015-09-10 07:51:37 +0000332 StringRef S = *this;
Duncan Sands8570b292012-02-21 12:00:25 +0000333
Chandler Carruth4425c912015-09-10 07:51:37 +0000334 // Count down from MaxSplit. When MaxSplit is -1, this will just split
335 // "forever". This doesn't support splitting more than 2^31 times
336 // intentionally; if we ever want that we can make MaxSplit a 64-bit integer
337 // but that seems unlikely to be useful.
338 while (MaxSplit-- != 0) {
339 size_t Idx = S.find(Separator);
340 if (Idx == npos)
341 break;
Duncan Sands8570b292012-02-21 12:00:25 +0000342
Chandler Carruth4425c912015-09-10 07:51:37 +0000343 // Push this split.
344 if (KeepEmpty || Idx > 0)
345 A.push_back(S.slice(0, Idx));
346
347 // Jump forward.
348 S = S.slice(Idx + Separator.size(), npos);
Duncan Sands8570b292012-02-21 12:00:25 +0000349 }
Chandler Carruth4425c912015-09-10 07:51:37 +0000350
351 // Push the tail.
352 if (KeepEmpty || !S.empty())
353 A.push_back(S);
Duncan Sands8570b292012-02-21 12:00:25 +0000354}
355
Chandler Carruth47712172015-09-10 06:07:03 +0000356void StringRef::split(SmallVectorImpl<StringRef> &A, char Separator,
357 int MaxSplit, bool KeepEmpty) const {
Chandler Carruth4425c912015-09-10 07:51:37 +0000358 StringRef S = *this;
Chandler Carruth47712172015-09-10 06:07:03 +0000359
Chandler Carruth4425c912015-09-10 07:51:37 +0000360 // Count down from MaxSplit. When MaxSplit is -1, this will just split
361 // "forever". This doesn't support splitting more than 2^31 times
362 // intentionally; if we ever want that we can make MaxSplit a 64-bit integer
363 // but that seems unlikely to be useful.
364 while (MaxSplit-- != 0) {
365 size_t Idx = S.find(Separator);
366 if (Idx == npos)
367 break;
Chandler Carruth47712172015-09-10 06:07:03 +0000368
Chandler Carruth4425c912015-09-10 07:51:37 +0000369 // Push this split.
370 if (KeepEmpty || Idx > 0)
371 A.push_back(S.slice(0, Idx));
372
373 // Jump forward.
374 S = S.slice(Idx + 1, npos);
Chandler Carruth47712172015-09-10 06:07:03 +0000375 }
Chandler Carruth4425c912015-09-10 07:51:37 +0000376
377 // Push the tail.
378 if (KeepEmpty || !S.empty())
379 A.push_back(S);
Chandler Carruth47712172015-09-10 06:07:03 +0000380}
381
Chris Lattner372a8ae2009-09-20 01:22:16 +0000382//===----------------------------------------------------------------------===//
383// Helpful Algorithms
384//===----------------------------------------------------------------------===//
385
386/// count - Return the number of non-overlapped occurrences of \arg Str in
387/// the string.
Daniel Dunbarad36e8a2009-11-06 10:58:06 +0000388size_t StringRef::count(StringRef Str) const {
Chris Lattner372a8ae2009-09-20 01:22:16 +0000389 size_t Count = 0;
390 size_t N = Str.size();
391 if (N > Length)
392 return 0;
393 for (size_t i = 0, e = Length - N + 1; i != e; ++i)
394 if (substr(i, N).equals(Str))
395 ++Count;
396 return Count;
397}
398
John McCall512b6502010-02-28 09:55:58 +0000399static unsigned GetAutoSenseRadix(StringRef &Str) {
Zachary Turnerd5d57632016-09-22 15:55:05 +0000400 if (Str.empty())
401 return 10;
402
Colin LeMahieu01431462016-03-18 18:22:07 +0000403 if (Str.startswith("0x") || Str.startswith("0X")) {
John McCall512b6502010-02-28 09:55:58 +0000404 Str = Str.substr(2);
405 return 16;
Chris Lattner0a1bafe2012-04-21 22:03:05 +0000406 }
407
Colin LeMahieu01431462016-03-18 18:22:07 +0000408 if (Str.startswith("0b") || Str.startswith("0B")) {
John McCall512b6502010-02-28 09:55:58 +0000409 Str = Str.substr(2);
410 return 2;
John McCall512b6502010-02-28 09:55:58 +0000411 }
Chris Lattner0a1bafe2012-04-21 22:03:05 +0000412
413 if (Str.startswith("0o")) {
414 Str = Str.substr(2);
415 return 8;
416 }
417
Zachary Turner65fd2fc2016-09-22 15:05:19 +0000418 if (Str[0] == '0' && Str.size() > 1 && ascii_isdigit(Str[1])) {
419 Str = Str.substr(1);
Chris Lattner0a1bafe2012-04-21 22:03:05 +0000420 return 8;
Zachary Turner65fd2fc2016-09-22 15:05:19 +0000421 }
422
Chris Lattner0a1bafe2012-04-21 22:03:05 +0000423 return 10;
John McCall512b6502010-02-28 09:55:58 +0000424}
425
Zachary Turner65fd2fc2016-09-22 15:05:19 +0000426bool llvm::consumeUnsignedInteger(StringRef &Str, unsigned Radix,
427 unsigned long long &Result) {
Chris Lattner68ee7002009-09-19 19:47:14 +0000428 // Autosense radix if not specified.
John McCall512b6502010-02-28 09:55:58 +0000429 if (Radix == 0)
430 Radix = GetAutoSenseRadix(Str);
Michael J. Spencerf13f4422010-11-26 04:16:08 +0000431
Chris Lattner68ee7002009-09-19 19:47:14 +0000432 // Empty strings (after the radix autosense) are invalid.
433 if (Str.empty()) return true;
Michael J. Spencerf13f4422010-11-26 04:16:08 +0000434
Chris Lattner68ee7002009-09-19 19:47:14 +0000435 // Parse all the bytes of the string given this radix. Watch for overflow.
Zachary Turner65fd2fc2016-09-22 15:05:19 +0000436 StringRef Str2 = Str;
Chris Lattner68ee7002009-09-19 19:47:14 +0000437 Result = 0;
Zachary Turner65fd2fc2016-09-22 15:05:19 +0000438 while (!Str2.empty()) {
Chris Lattner68ee7002009-09-19 19:47:14 +0000439 unsigned CharVal;
Zachary Turner65fd2fc2016-09-22 15:05:19 +0000440 if (Str2[0] >= '0' && Str2[0] <= '9')
441 CharVal = Str2[0] - '0';
442 else if (Str2[0] >= 'a' && Str2[0] <= 'z')
443 CharVal = Str2[0] - 'a' + 10;
444 else if (Str2[0] >= 'A' && Str2[0] <= 'Z')
445 CharVal = Str2[0] - 'A' + 10;
Chris Lattner68ee7002009-09-19 19:47:14 +0000446 else
Zachary Turner65fd2fc2016-09-22 15:05:19 +0000447 break;
Michael J. Spencerf13f4422010-11-26 04:16:08 +0000448
Zachary Turner65fd2fc2016-09-22 15:05:19 +0000449 // If the parsed value is larger than the integer radix, we cannot
450 // consume any more characters.
Chris Lattner68ee7002009-09-19 19:47:14 +0000451 if (CharVal >= Radix)
Zachary Turner65fd2fc2016-09-22 15:05:19 +0000452 break;
Michael J. Spencerf13f4422010-11-26 04:16:08 +0000453
Chris Lattner68ee7002009-09-19 19:47:14 +0000454 // Add in this character.
455 unsigned long long PrevResult = Result;
Zachary Turner65fd2fc2016-09-22 15:05:19 +0000456 Result = Result * Radix + CharVal;
Michael J. Spencerf13f4422010-11-26 04:16:08 +0000457
Nick Kledzik35c79da2012-10-02 20:01:48 +0000458 // Check for overflow by shifting back and seeing if bits were lost.
Zachary Turner65fd2fc2016-09-22 15:05:19 +0000459 if (Result / Radix < PrevResult)
Chris Lattner68ee7002009-09-19 19:47:14 +0000460 return true;
461
Zachary Turner65fd2fc2016-09-22 15:05:19 +0000462 Str2 = Str2.substr(1);
Chris Lattner68ee7002009-09-19 19:47:14 +0000463 }
Michael J. Spencerf13f4422010-11-26 04:16:08 +0000464
Zachary Turner65fd2fc2016-09-22 15:05:19 +0000465 // We consider the operation a failure if no characters were consumed
466 // successfully.
467 if (Str.size() == Str2.size())
468 return true;
469
470 Str = Str2;
Chris Lattner68ee7002009-09-19 19:47:14 +0000471 return false;
472}
473
Zachary Turner65fd2fc2016-09-22 15:05:19 +0000474bool llvm::consumeSignedInteger(StringRef &Str, unsigned Radix,
475 long long &Result) {
Chris Lattner84c15272009-09-19 23:58:48 +0000476 unsigned long long ULLVal;
Michael J. Spencerf13f4422010-11-26 04:16:08 +0000477
Chris Lattner84c15272009-09-19 23:58:48 +0000478 // Handle positive strings first.
Michael J. Spencercfa95f62012-03-10 23:02:54 +0000479 if (Str.empty() || Str.front() != '-') {
Zachary Turner65fd2fc2016-09-22 15:05:19 +0000480 if (consumeUnsignedInteger(Str, Radix, ULLVal) ||
Chris Lattner84c15272009-09-19 23:58:48 +0000481 // Check for value so large it overflows a signed value.
482 (long long)ULLVal < 0)
483 return true;
484 Result = ULLVal;
485 return false;
486 }
Michael J. Spencerf13f4422010-11-26 04:16:08 +0000487
Chris Lattner84c15272009-09-19 23:58:48 +0000488 // Get the positive part of the value.
Zachary Turner65fd2fc2016-09-22 15:05:19 +0000489 StringRef Str2 = Str.drop_front(1);
490 if (consumeUnsignedInteger(Str2, Radix, ULLVal) ||
Chris Lattner84c15272009-09-19 23:58:48 +0000491 // Reject values so large they'd overflow as negative signed, but allow
492 // "-0". This negates the unsigned so that the negative isn't undefined
493 // on signed overflow.
494 (long long)-ULLVal > 0)
495 return true;
Michael J. Spencerf13f4422010-11-26 04:16:08 +0000496
Zachary Turner65fd2fc2016-09-22 15:05:19 +0000497 Str = Str2;
Chris Lattner84c15272009-09-19 23:58:48 +0000498 Result = -ULLVal;
499 return false;
500}
501
Zachary Turner65fd2fc2016-09-22 15:05:19 +0000502/// GetAsUnsignedInteger - Workhorse method that converts a integer character
503/// sequence of radix up to 36 to an unsigned long long value.
504bool llvm::getAsUnsignedInteger(StringRef Str, unsigned Radix,
505 unsigned long long &Result) {
506 if (consumeUnsignedInteger(Str, Radix, Result))
507 return true;
508
509 // For getAsUnsignedInteger, we require the whole string to be consumed or
510 // else we consider it a failure.
511 return !Str.empty();
512}
513
514bool llvm::getAsSignedInteger(StringRef Str, unsigned Radix,
515 long long &Result) {
516 if (consumeSignedInteger(Str, Radix, Result))
517 return true;
518
519 // For getAsSignedInteger, we require the whole string to be consumed or else
520 // we consider it a failure.
521 return !Str.empty();
522}
523
John McCall512b6502010-02-28 09:55:58 +0000524bool StringRef::getAsInteger(unsigned Radix, APInt &Result) const {
525 StringRef Str = *this;
526
527 // Autosense radix if not specified.
528 if (Radix == 0)
529 Radix = GetAutoSenseRadix(Str);
530
531 assert(Radix > 1 && Radix <= 36);
Michael J. Spencerf13f4422010-11-26 04:16:08 +0000532
John McCall512b6502010-02-28 09:55:58 +0000533 // Empty strings (after the radix autosense) are invalid.
534 if (Str.empty()) return true;
535
536 // Skip leading zeroes. This can be a significant improvement if
537 // it means we don't need > 64 bits.
538 while (!Str.empty() && Str.front() == '0')
539 Str = Str.substr(1);
540
541 // If it was nothing but zeroes....
542 if (Str.empty()) {
543 Result = APInt(64, 0);
544 return false;
545 }
546
547 // (Over-)estimate the required number of bits.
548 unsigned Log2Radix = 0;
549 while ((1U << Log2Radix) < Radix) Log2Radix++;
550 bool IsPowerOf2Radix = ((1U << Log2Radix) == Radix);
551
552 unsigned BitWidth = Log2Radix * Str.size();
553 if (BitWidth < Result.getBitWidth())
554 BitWidth = Result.getBitWidth(); // don't shrink the result
Chris Lattner5e146662012-04-23 00:27:54 +0000555 else if (BitWidth > Result.getBitWidth())
Jay Foad583abbc2010-12-07 08:25:19 +0000556 Result = Result.zext(BitWidth);
John McCall512b6502010-02-28 09:55:58 +0000557
558 APInt RadixAP, CharAP; // unused unless !IsPowerOf2Radix
559 if (!IsPowerOf2Radix) {
560 // These must have the same bit-width as Result.
561 RadixAP = APInt(BitWidth, Radix);
562 CharAP = APInt(BitWidth, 0);
563 }
564
565 // Parse all the bytes of the string given this radix.
566 Result = 0;
567 while (!Str.empty()) {
568 unsigned CharVal;
569 if (Str[0] >= '0' && Str[0] <= '9')
570 CharVal = Str[0]-'0';
571 else if (Str[0] >= 'a' && Str[0] <= 'z')
572 CharVal = Str[0]-'a'+10;
573 else if (Str[0] >= 'A' && Str[0] <= 'Z')
574 CharVal = Str[0]-'A'+10;
575 else
576 return true;
Michael J. Spencerf13f4422010-11-26 04:16:08 +0000577
John McCall512b6502010-02-28 09:55:58 +0000578 // If the parsed value is larger than the integer radix, the string is
579 // invalid.
580 if (CharVal >= Radix)
581 return true;
Michael J. Spencerf13f4422010-11-26 04:16:08 +0000582
John McCall512b6502010-02-28 09:55:58 +0000583 // Add in this character.
584 if (IsPowerOf2Radix) {
585 Result <<= Log2Radix;
586 Result |= CharVal;
587 } else {
588 Result *= RadixAP;
589 CharAP = CharVal;
590 Result += CharAP;
591 }
592
593 Str = Str.substr(1);
594 }
Michael J. Spencerf13f4422010-11-26 04:16:08 +0000595
John McCall512b6502010-02-28 09:55:58 +0000596 return false;
597}
Chandler Carruthca99ad32012-03-04 10:55:27 +0000598
Zachary Turner8bd42a12017-02-14 19:06:37 +0000599bool StringRef::getAsDouble(double &Result, bool AllowInexact) const {
600 APFloat F(0.0);
601 APFloat::opStatus Status =
602 F.convertFromString(*this, APFloat::rmNearestTiesToEven);
603 if (Status != APFloat::opOK) {
604 if (!AllowInexact || Status != APFloat::opInexact)
605 return true;
606 }
607
608 Result = F.convertToDouble();
609 return false;
610}
Chandler Carruthca99ad32012-03-04 10:55:27 +0000611
612// Implementation of StringRef hashing.
613hash_code llvm::hash_value(StringRef S) {
614 return hash_combine_range(S.begin(), S.end());
615}