Blame - lib/Support/StringRef.cpp - fp2-dev/platform/external/llvm

blob: 2b262dcd3fa71445425f61672760f65d07dc4e6b [file] [log] [blame]

Daniel Dunbar	e655128	2009-09-16 22:38:48 +0000	[diff] [blame]	1	//===-- StringRef.cpp - Lightweight String References ---------------------===//
				2	//
				3	// The LLVM Compiler Infrastructure
				4	//
				5	// This file is distributed under the University of Illinois Open Source
				6	// License. See LICENSE.TXT for details.
				7	//
				8	//===----------------------------------------------------------------------===//
				9
				10	#include "llvm/ADT/StringRef.h"
John McCall	1e7ad39	2010-02-28 09:55:58 +0000	[diff] [blame]	11	#include "llvm/ADT/APInt.h"
Douglas Gregor	ad6b6da	2010-01-07 00:51:54 +0000	[diff] [blame]	12
Daniel Dunbar	e655128	2009-09-16 22:38:48 +0000	[diff] [blame]	13	using namespace llvm;
				14
Daniel Dunbar	77696be	2009-09-22 03:34:40 +0000	[diff] [blame]	15	// MSVC emits references to this into the translation units which reference it.
				16	#ifndef _MSC_VER
Daniel Dunbar	e655128	2009-09-16 22:38:48 +0000	[diff] [blame]	17	const size_t StringRef::npos;
Daniel Dunbar	77696be	2009-09-22 03:34:40 +0000	[diff] [blame]	18	#endif
Chris Lattner	cea1438	2009-09-19 19:47:14 +0000	[diff] [blame]	19
Benjamin Kramer	05872ea	2009-11-12 20:36:59 +0000	[diff] [blame]	20	static char ascii_tolower(char x) {
				21	if (x >= 'A' && x <= 'Z')
				22	return x - 'A' + 'a';
				23	return x;
				24	}
				25
				26	/// compare_lower - Compare strings, ignoring case.
				27	int StringRef::compare_lower(StringRef RHS) const {
Daniel Dunbar	58ce7ac	2009-11-19 18:53:18 +0000	[diff] [blame]	28	for (size_t I = 0, E = min(Length, RHS.Length); I != E; ++I) {
Benjamin Kramer	05872ea	2009-11-12 20:36:59 +0000	[diff] [blame]	29	char LHC = ascii_tolower(Data[I]);
				30	char RHC = ascii_tolower(RHS.Data[I]);
				31	if (LHC != RHC)
				32	return LHC < RHC ? -1 : 1;
				33	}
				34
				35	if (Length == RHS.Length)
				36	return 0;
				37	return Length < RHS.Length ? -1 : 1;
				38	}
				39
Douglas Gregor	7e54d5b	2009-12-31 04:24:34 +0000	[diff] [blame]	40	// Compute the edit distance between the two given strings.
Douglas Gregor	441c8b4	2009-12-30 17:23:44 +0000	[diff] [blame]	41	unsigned StringRef::edit_distance(llvm::StringRef Other,
				42	bool AllowReplacements) {
Douglas Gregor	7e54d5b	2009-12-31 04:24:34 +0000	[diff] [blame]	43	// The algorithm implemented below is the "classic"
				44	// dynamic-programming algorithm for computing the Levenshtein
				45	// distance, which is described here:
				46	//
				47	// http://en.wikipedia.org/wiki/Levenshtein_distance
				48	//
				49	// Although the algorithm is typically described using an m x n
				50	// array, only two rows are used at a time, so this implemenation
				51	// just keeps two separate vectors for those two rows.
Douglas Gregor	441c8b4	2009-12-30 17:23:44 +0000	[diff] [blame]	52	size_type m = size();
				53	size_type n = Other.size();
				54
Douglas Gregor	2772ea8	2010-01-07 02:24:06 +0000	[diff] [blame]	55	const unsigned SmallBufferSize = 64;
				56	unsigned SmallBuffer[SmallBufferSize];
				57	unsigned *Allocated = 0;
				58	unsigned *previous = SmallBuffer;
				59	if (2*(n + 1) > SmallBufferSize)
				60	Allocated = previous = new unsigned [2*(n+1)];
				61	unsigned *current = previous + (n + 1);
Douglas Gregor	ad6b6da	2010-01-07 00:51:54 +0000	[diff] [blame]	62
				63	for (unsigned i = 0; i <= n; ++i)
Douglas Gregor	441c8b4	2009-12-30 17:23:44 +0000	[diff] [blame]	64	previous[i] = i;
				65
Douglas Gregor	441c8b4	2009-12-30 17:23:44 +0000	[diff] [blame]	66	for (size_type y = 1; y <= m; ++y) {
Douglas Gregor	441c8b4	2009-12-30 17:23:44 +0000	[diff] [blame]	67	current[0] = y;
				68	for (size_type x = 1; x <= n; ++x) {
				69	if (AllowReplacements) {
				70	current[x] = min(previous[x-1] + ((*this)[y-1] == Other[x-1]? 0u:1u),
				71	min(current[x-1], previous[x])+1);
				72	}
				73	else {
				74	if ((*this)[y-1] == Other[x-1]) current[x] = previous[x-1];
				75	else current[x] = min(current[x-1], previous[x]) + 1;
				76	}
				77	}
Douglas Gregor	ad6b6da	2010-01-07 00:51:54 +0000	[diff] [blame]	78
				79	unsigned *tmp = current;
				80	current = previous;
				81	previous = tmp;
Douglas Gregor	441c8b4	2009-12-30 17:23:44 +0000	[diff] [blame]	82	}
				83
Douglas Gregor	ad6b6da	2010-01-07 00:51:54 +0000	[diff] [blame]	84	unsigned Result = previous[n];
Douglas Gregor	2772ea8	2010-01-07 02:24:06 +0000	[diff] [blame]	85	delete [] Allocated;
Douglas Gregor	ad6b6da	2010-01-07 00:51:54 +0000	[diff] [blame]	86
				87	return Result;
Douglas Gregor	441c8b4	2009-12-30 17:23:44 +0000	[diff] [blame]	88	}
				89
Chris Lattner	05a32c8	2009-09-20 01:22:16 +0000	[diff] [blame]	90	//===----------------------------------------------------------------------===//
				91	// String Searching
				92	//===----------------------------------------------------------------------===//
				93
				94
				95	/// find - Search for the first string \arg Str in the string.
				96	///
				97	/// \return - The index of the first occurence of \arg Str, or npos if not
				98	/// found.
Daniel Dunbar	64066bd	2009-11-11 00:28:53 +0000	[diff] [blame]	99	size_t StringRef::find(StringRef Str, size_t From) const {
Chris Lattner	05a32c8	2009-09-20 01:22:16 +0000	[diff] [blame]	100	size_t N = Str.size();
				101	if (N > Length)
				102	return npos;
Daniel Dunbar	58ce7ac	2009-11-19 18:53:18 +0000	[diff] [blame]	103	for (size_t e = Length - N + 1, i = min(From, e); i != e; ++i)
Chris Lattner	05a32c8	2009-09-20 01:22:16 +0000	[diff] [blame]	104	if (substr(i, N).equals(Str))
				105	return i;
				106	return npos;
				107	}
				108
				109	/// rfind - Search for the last string \arg Str in the string.
				110	///
				111	/// \return - The index of the last occurence of \arg Str, or npos if not
				112	/// found.
Daniel Dunbar	2928c83	2009-11-06 10:58:06 +0000	[diff] [blame]	113	size_t StringRef::rfind(StringRef Str) const {
Chris Lattner	05a32c8	2009-09-20 01:22:16 +0000	[diff] [blame]	114	size_t N = Str.size();
				115	if (N > Length)
				116	return npos;
				117	for (size_t i = Length - N + 1, e = 0; i != e;) {
				118	--i;
				119	if (substr(i, N).equals(Str))
				120	return i;
				121	}
				122	return npos;
				123	}
				124
Daniel Dunbar	64066bd	2009-11-11 00:28:53 +0000	[diff] [blame]	125	/// find_first_of - Find the first character in the string that is in \arg
				126	/// Chars, or npos if not found.
				127	///
				128	/// Note: O(size() * Chars.size())
				129	StringRef::size_type StringRef::find_first_of(StringRef Chars,
				130	size_t From) const {
Daniel Dunbar	58ce7ac	2009-11-19 18:53:18 +0000	[diff] [blame]	131	for (size_type i = min(From, Length), e = Length; i != e; ++i)
Chris Lattner	05a32c8	2009-09-20 01:22:16 +0000	[diff] [blame]	132	if (Chars.find(Data[i]) != npos)
				133	return i;
				134	return npos;
				135	}
				136
				137	/// find_first_not_of - Find the first character in the string that is not
Daniel Dunbar	64066bd	2009-11-11 00:28:53 +0000	[diff] [blame]	138	/// \arg C or npos if not found.
				139	StringRef::size_type StringRef::find_first_not_of(char C, size_t From) const {
Daniel Dunbar	58ce7ac	2009-11-19 18:53:18 +0000	[diff] [blame]	140	for (size_type i = min(From, Length), e = Length; i != e; ++i)
Daniel Dunbar	64066bd	2009-11-11 00:28:53 +0000	[diff] [blame]	141	if (Data[i] != C)
				142	return i;
				143	return npos;
				144	}
				145
				146	/// find_first_not_of - Find the first character in the string that is not
				147	/// in the string \arg Chars, or npos if not found.
				148	///
				149	/// Note: O(size() * Chars.size())
				150	StringRef::size_type StringRef::find_first_not_of(StringRef Chars,
				151	size_t From) const {
Daniel Dunbar	58ce7ac	2009-11-19 18:53:18 +0000	[diff] [blame]	152	for (size_type i = min(From, Length), e = Length; i != e; ++i)
Chris Lattner	05a32c8	2009-09-20 01:22:16 +0000	[diff] [blame]	153	if (Chars.find(Data[i]) == npos)
				154	return i;
				155	return npos;
				156	}
				157
				158
				159	//===----------------------------------------------------------------------===//
				160	// Helpful Algorithms
				161	//===----------------------------------------------------------------------===//
				162
				163	/// count - Return the number of non-overlapped occurrences of \arg Str in
				164	/// the string.
Daniel Dunbar	2928c83	2009-11-06 10:58:06 +0000	[diff] [blame]	165	size_t StringRef::count(StringRef Str) const {
Chris Lattner	05a32c8	2009-09-20 01:22:16 +0000	[diff] [blame]	166	size_t Count = 0;
				167	size_t N = Str.size();
				168	if (N > Length)
				169	return 0;
				170	for (size_t i = 0, e = Length - N + 1; i != e; ++i)
				171	if (substr(i, N).equals(Str))
				172	++Count;
				173	return Count;
				174	}
				175
John McCall	1e7ad39	2010-02-28 09:55:58 +0000	[diff] [blame]	176	static unsigned GetAutoSenseRadix(StringRef &Str) {
				177	if (Str.startswith("0x")) {
				178	Str = Str.substr(2);
				179	return 16;
				180	} else if (Str.startswith("0b")) {
				181	Str = Str.substr(2);
				182	return 2;
				183	} else if (Str.startswith("0")) {
				184	return 8;
				185	} else {
				186	return 10;
				187	}
				188	}
				189
				190
Chris Lattner	63c6b7d	2009-09-19 23:58:48 +0000	[diff] [blame]	191	/// GetAsUnsignedInteger - Workhorse method that converts a integer character
				192	/// sequence of radix up to 36 to an unsigned long long value.
Chris Lattner	cea1438	2009-09-19 19:47:14 +0000	[diff] [blame]	193	static bool GetAsUnsignedInteger(StringRef Str, unsigned Radix,
				194	unsigned long long &Result) {
				195	// Autosense radix if not specified.
John McCall	1e7ad39	2010-02-28 09:55:58 +0000	[diff] [blame]	196	if (Radix == 0)
				197	Radix = GetAutoSenseRadix(Str);
Chris Lattner	cea1438	2009-09-19 19:47:14 +0000	[diff] [blame]	198
				199	// Empty strings (after the radix autosense) are invalid.
				200	if (Str.empty()) return true;
				201
				202	// Parse all the bytes of the string given this radix. Watch for overflow.
				203	Result = 0;
				204	while (!Str.empty()) {
				205	unsigned CharVal;
				206	if (Str[0] >= '0' && Str[0] <= '9')
				207	CharVal = Str[0]-'0';
				208	else if (Str[0] >= 'a' && Str[0] <= 'z')
				209	CharVal = Str[0]-'a'+10;
				210	else if (Str[0] >= 'A' && Str[0] <= 'Z')
				211	CharVal = Str[0]-'A'+10;
				212	else
				213	return true;
				214
				215	// If the parsed value is larger than the integer radix, the string is
				216	// invalid.
				217	if (CharVal >= Radix)
				218	return true;
				219
				220	// Add in this character.
				221	unsigned long long PrevResult = Result;
				222	Result = Result*Radix+CharVal;
				223
				224	// Check for overflow.
				225	if (Result < PrevResult)
				226	return true;
				227
				228	Str = Str.substr(1);
				229	}
				230
				231	return false;
				232	}
				233
				234	bool StringRef::getAsInteger(unsigned Radix, unsigned long long &Result) const {
				235	return GetAsUnsignedInteger(*this, Radix, Result);
				236	}
				237
Chris Lattner	63c6b7d	2009-09-19 23:58:48 +0000	[diff] [blame]	238
				239	bool StringRef::getAsInteger(unsigned Radix, long long &Result) const {
				240	unsigned long long ULLVal;
				241
				242	// Handle positive strings first.
				243	if (empty() \|\| front() != '-') {
				244	if (GetAsUnsignedInteger(*this, Radix, ULLVal) \|\|
				245	// Check for value so large it overflows a signed value.
				246	(long long)ULLVal < 0)
				247	return true;
				248	Result = ULLVal;
				249	return false;
				250	}
				251
				252	// Get the positive part of the value.
				253	if (GetAsUnsignedInteger(substr(1), Radix, ULLVal) \|\|
				254	// Reject values so large they'd overflow as negative signed, but allow
				255	// "-0". This negates the unsigned so that the negative isn't undefined
				256	// on signed overflow.
				257	(long long)-ULLVal > 0)
				258	return true;
				259
				260	Result = -ULLVal;
				261	return false;
				262	}
				263
				264	bool StringRef::getAsInteger(unsigned Radix, int &Result) const {
				265	long long Val;
				266	if (getAsInteger(Radix, Val) \|\|
				267	(int)Val != Val)
				268	return true;
				269	Result = Val;
				270	return false;
				271	}
				272
				273	bool StringRef::getAsInteger(unsigned Radix, unsigned &Result) const {
				274	unsigned long long Val;
				275	if (getAsInteger(Radix, Val) \|\|
				276	(unsigned)Val != Val)
				277	return true;
				278	Result = Val;
				279	return false;
				280	}
John McCall	1e7ad39	2010-02-28 09:55:58 +0000	[diff] [blame]	281
				282	bool StringRef::getAsInteger(unsigned Radix, APInt &Result) const {
				283	StringRef Str = *this;
				284
				285	// Autosense radix if not specified.
				286	if (Radix == 0)
				287	Radix = GetAutoSenseRadix(Str);
				288
				289	assert(Radix > 1 && Radix <= 36);
				290
				291	// Empty strings (after the radix autosense) are invalid.
				292	if (Str.empty()) return true;
				293
				294	// Skip leading zeroes. This can be a significant improvement if
				295	// it means we don't need > 64 bits.
				296	while (!Str.empty() && Str.front() == '0')
				297	Str = Str.substr(1);
				298
				299	// If it was nothing but zeroes....
				300	if (Str.empty()) {
				301	Result = APInt(64, 0);
				302	return false;
				303	}
				304
				305	// (Over-)estimate the required number of bits.
				306	unsigned Log2Radix = 0;
				307	while ((1U << Log2Radix) < Radix) Log2Radix++;
				308	bool IsPowerOf2Radix = ((1U << Log2Radix) == Radix);
				309
				310	unsigned BitWidth = Log2Radix * Str.size();
				311	if (BitWidth < Result.getBitWidth())
				312	BitWidth = Result.getBitWidth(); // don't shrink the result
				313	else
				314	Result.zext(BitWidth);
				315
				316	APInt RadixAP, CharAP; // unused unless !IsPowerOf2Radix
				317	if (!IsPowerOf2Radix) {
				318	// These must have the same bit-width as Result.
				319	RadixAP = APInt(BitWidth, Radix);
				320	CharAP = APInt(BitWidth, 0);
				321	}
				322
				323	// Parse all the bytes of the string given this radix.
				324	Result = 0;
				325	while (!Str.empty()) {
				326	unsigned CharVal;
				327	if (Str[0] >= '0' && Str[0] <= '9')
				328	CharVal = Str[0]-'0';
				329	else if (Str[0] >= 'a' && Str[0] <= 'z')
				330	CharVal = Str[0]-'a'+10;
				331	else if (Str[0] >= 'A' && Str[0] <= 'Z')
				332	CharVal = Str[0]-'A'+10;
				333	else
				334	return true;
				335
				336	// If the parsed value is larger than the integer radix, the string is
				337	// invalid.
				338	if (CharVal >= Radix)
				339	return true;
				340
				341	// Add in this character.
				342	if (IsPowerOf2Radix) {
				343	Result <<= Log2Radix;
				344	Result \|= CharVal;
				345	} else {
				346	Result *= RadixAP;
				347	CharAP = CharVal;
				348	Result += CharAP;
				349	}
				350
				351	Str = Str.substr(1);
				352	}
				353
				354	return false;
				355	}