Blame - lib/Lex/LiteralSupport.cpp - fp2-dev/platform/external/clang

blob: 8c2f2aff47478cbec7fcd00d406523451140925c [file] [log] [blame]

Reid Spencer	5f016e2	2007-07-11 17:01:13 +0000	[diff] [blame]	1	//===--- LiteralSupport.cpp - Code to parse and process literals ----------===//
				2	//
				3	// The LLVM Compiler Infrastructure
				4	//
Chris Lattner	0bc735f	2007-12-29 19:59:25 +0000	[diff] [blame]	5	// This file is distributed under the University of Illinois Open Source
				6	// License. See LICENSE.TXT for details.
Reid Spencer	5f016e2	2007-07-11 17:01:13 +0000	[diff] [blame]	7	//
				8	//===----------------------------------------------------------------------===//
				9	//
				10	// This file implements the NumericLiteralParser, CharLiteralParser, and
				11	// StringLiteralParser interfaces.
				12	//
				13	//===----------------------------------------------------------------------===//
				14
				15	#include "clang/Lex/LiteralSupport.h"
				16	#include "clang/Lex/Preprocessor.h"
Reid Spencer	5f016e2	2007-07-11 17:01:13 +0000	[diff] [blame]	17	#include "clang/Basic/Diagnostic.h"
Chris Lattner	136f93a	2007-07-16 06:55:01 +0000	[diff] [blame]	18	#include "clang/Basic/TargetInfo.h"
Reid Spencer	5f016e2	2007-07-11 17:01:13 +0000	[diff] [blame]	19	#include "llvm/ADT/StringExtras.h"
				20	using namespace clang;
				21
				22	/// HexDigitValue - Return the value of the specified hex digit, or -1 if it's
				23	/// not valid.
				24	static int HexDigitValue(char C) {
				25	if (C >= '0' && C <= '9') return C-'0';
				26	if (C >= 'a' && C <= 'f') return C-'a'+10;
				27	if (C >= 'A' && C <= 'F') return C-'A'+10;
				28	return -1;
				29	}
				30
				31	/// ProcessCharEscape - Parse a standard C escape sequence, which can occur in
				32	/// either a character or a string literal.
				33	static unsigned ProcessCharEscape(const char *&ThisTokBuf,
				34	const char *ThisTokEnd, bool &HadError,
				35	SourceLocation Loc, bool IsWide,
				36	Preprocessor &PP) {
				37	// Skip the '\' char.
				38	++ThisTokBuf;
				39
				40	// We know that this character can't be off the end of the buffer, because
				41	// that would have been \", which would not have been the end of string.
				42	unsigned ResultChar = *ThisTokBuf++;
				43	switch (ResultChar) {
				44	// These map to themselves.
				45	case '\\': case '\'': case '"': case '?': break;
				46
				47	// These have fixed mappings.
				48	case 'a':
				49	// TODO: K&R: the meaning of '\\a' is different in traditional C
				50	ResultChar = 7;
				51	break;
				52	case 'b':
				53	ResultChar = 8;
				54	break;
				55	case 'e':
				56	PP.Diag(Loc, diag::ext_nonstandard_escape, "e");
				57	ResultChar = 27;
				58	break;
				59	case 'f':
				60	ResultChar = 12;
				61	break;
				62	case 'n':
				63	ResultChar = 10;
				64	break;
				65	case 'r':
				66	ResultChar = 13;
				67	break;
				68	case 't':
				69	ResultChar = 9;
				70	break;
				71	case 'v':
				72	ResultChar = 11;
				73	break;
				74
				75	//case 'u': case 'U': // FIXME: UCNs.
				76	case 'x': { // Hex escape.
				77	ResultChar = 0;
				78	if (ThisTokBuf == ThisTokEnd \|\| !isxdigit(*ThisTokBuf)) {
				79	PP.Diag(Loc, diag::err_hex_escape_no_digits);
				80	HadError = 1;
				81	break;
				82	}
				83
				84	// Hex escapes are a maximal series of hex digits.
				85	bool Overflow = false;
				86	for (; ThisTokBuf != ThisTokEnd; ++ThisTokBuf) {
				87	int CharVal = HexDigitValue(ThisTokBuf[0]);
				88	if (CharVal == -1) break;
Chris Lattner	b812814	2007-09-03 18:28:41 +0000	[diff] [blame]	89	Overflow \|= (ResultChar & 0xF0000000) ? true : false; // About to shift out a digit?
Reid Spencer	5f016e2	2007-07-11 17:01:13 +0000	[diff] [blame]	90	ResultChar <<= 4;
				91	ResultChar \|= CharVal;
				92	}
				93
				94	// See if any bits will be truncated when evaluated as a character.
Chris Lattner	98be494	2008-03-05 18:54:05 +0000	[diff] [blame]	95	unsigned CharWidth = PP.getTargetInfo().getCharWidth(IsWide);
Ted Kremenek	9c728dc	2007-12-12 22:39:36 +0000	[diff] [blame]	96
Reid Spencer	5f016e2	2007-07-11 17:01:13 +0000	[diff] [blame]	97	if (CharWidth != 32 && (ResultChar >> CharWidth) != 0) {
				98	Overflow = true;
				99	ResultChar &= ~0U >> (32-CharWidth);
				100	}
				101
				102	// Check for overflow.
				103	if (Overflow) // Too many digits to fit in
				104	PP.Diag(Loc, diag::warn_hex_escape_too_large);
				105	break;
				106	}
				107	case '0': case '1': case '2': case '3':
				108	case '4': case '5': case '6': case '7': {
				109	// Octal escapes.
				110	--ThisTokBuf;
				111	ResultChar = 0;
				112
				113	// Octal escapes are a series of octal digits with maximum length 3.
				114	// "\0123" is a two digit sequence equal to "\012" "3".
				115	unsigned NumDigits = 0;
				116	do {
				117	ResultChar <<= 3;
				118	ResultChar \|= *ThisTokBuf++ - '0';
				119	++NumDigits;
				120	} while (ThisTokBuf != ThisTokEnd && NumDigits < 3 &&
				121	ThisTokBuf[0] >= '0' && ThisTokBuf[0] <= '7');
				122
				123	// Check for overflow. Reject '\777', but not L'\777'.
Chris Lattner	98be494	2008-03-05 18:54:05 +0000	[diff] [blame]	124	unsigned CharWidth = PP.getTargetInfo().getCharWidth(IsWide);
Ted Kremenek	9c728dc	2007-12-12 22:39:36 +0000	[diff] [blame]	125
Reid Spencer	5f016e2	2007-07-11 17:01:13 +0000	[diff] [blame]	126	if (CharWidth != 32 && (ResultChar >> CharWidth) != 0) {
				127	PP.Diag(Loc, diag::warn_octal_escape_too_large);
				128	ResultChar &= ~0U >> (32-CharWidth);
				129	}
				130	break;
				131	}
				132
				133	// Otherwise, these are not valid escapes.
				134	case '(': case '{': case '[': case '%':
				135	// GCC accepts these as extensions. We warn about them as such though.
				136	if (!PP.getLangOptions().NoExtensions) {
				137	PP.Diag(Loc, diag::ext_nonstandard_escape,
				138	std::string()+(char)ResultChar);
				139	break;
				140	}
				141	// FALL THROUGH.
				142	default:
				143	if (isgraph(ThisTokBuf[0])) {
				144	PP.Diag(Loc, diag::ext_unknown_escape, std::string()+(char)ResultChar);
				145	} else {
				146	PP.Diag(Loc, diag::ext_unknown_escape, "x"+llvm::utohexstr(ResultChar));
				147	}
				148	break;
				149	}
				150
				151	return ResultChar;
				152	}
				153
				154
				155
				156
				157	/// integer-constant: [C99 6.4.4.1]
				158	/// decimal-constant integer-suffix
				159	/// octal-constant integer-suffix
				160	/// hexadecimal-constant integer-suffix
				161	/// decimal-constant:
				162	/// nonzero-digit
				163	/// decimal-constant digit
				164	/// octal-constant:
				165	/// 0
				166	/// octal-constant octal-digit
				167	/// hexadecimal-constant:
				168	/// hexadecimal-prefix hexadecimal-digit
				169	/// hexadecimal-constant hexadecimal-digit
				170	/// hexadecimal-prefix: one of
				171	/// 0x 0X
				172	/// integer-suffix:
				173	/// unsigned-suffix [long-suffix]
				174	/// unsigned-suffix [long-long-suffix]
				175	/// long-suffix [unsigned-suffix]
				176	/// long-long-suffix [unsigned-sufix]
				177	/// nonzero-digit:
				178	/// 1 2 3 4 5 6 7 8 9
				179	/// octal-digit:
				180	/// 0 1 2 3 4 5 6 7
				181	/// hexadecimal-digit:
				182	/// 0 1 2 3 4 5 6 7 8 9
				183	/// a b c d e f
				184	/// A B C D E F
				185	/// unsigned-suffix: one of
				186	/// u U
				187	/// long-suffix: one of
				188	/// l L
				189	/// long-long-suffix: one of
				190	/// ll LL
				191	///
				192	/// floating-constant: [C99 6.4.4.2]
				193	/// TODO: add rules...
				194	///
Reid Spencer	5f016e2	2007-07-11 17:01:13 +0000	[diff] [blame]	195	NumericLiteralParser::
				196	NumericLiteralParser(const char begin, const char end,
				197	SourceLocation TokLoc, Preprocessor &pp)
				198	: PP(pp), ThisTokBegin(begin), ThisTokEnd(end) {
				199	s = DigitsBegin = begin;
				200	saw_exponent = false;
				201	saw_period = false;
Reid Spencer	5f016e2	2007-07-11 17:01:13 +0000	[diff] [blame]	202	isLong = false;
				203	isUnsigned = false;
				204	isLongLong = false;
Chris Lattner	6e400c2	2007-08-26 03:29:23 +0000	[diff] [blame]	205	isFloat = false;
Chris Lattner	506b8de	2007-08-26 01:58:14 +0000	[diff] [blame]	206	isImaginary = false;
Reid Spencer	5f016e2	2007-07-11 17:01:13 +0000	[diff] [blame]	207	hadError = false;
				208
				209	if (*s == '0') { // parse radix
Chris Lattner	368328c	2008-06-30 06:39:54 +0000	[diff] [blame]	210	ParseNumberStartingWithZero(TokLoc);
				211	if (hadError)
				212	return;
Reid Spencer	5f016e2	2007-07-11 17:01:13 +0000	[diff] [blame]	213	} else { // the first digit is non-zero
				214	radix = 10;
				215	s = SkipDigits(s);
				216	if (s == ThisTokEnd) {
				217	// Done.
Christopher Lamb	016765e	2007-11-29 06:06:27 +0000	[diff] [blame]	218	} else if (isxdigit(s) && !(s == 'e' \|\| *s == 'E')) {
Chris Lattner	0b7f69d	2008-04-20 18:41:46 +0000	[diff] [blame]	219	Diag(PP.AdvanceToTokenCharacter(TokLoc, s-begin),
				220	diag::err_invalid_decimal_digit, std::string(s, s+1));
Reid Spencer	5f016e2	2007-07-11 17:01:13 +0000	[diff] [blame]	221	return;
				222	} else if (*s == '.') {
				223	s++;
				224	saw_period = true;
				225	s = SkipDigits(s);
				226	}
Eli Friedman	6f7adbd	2008-09-02 05:29:22 +0000	[diff] [blame]	227	if (s != ThisTokEnd && (s == 'e' \|\| s == 'E')) { // exponent
Chris Lattner	70f66ab	2008-04-20 18:47:55 +0000	[diff] [blame]	228	const char *Exponent = s;
Reid Spencer	5f016e2	2007-07-11 17:01:13 +0000	[diff] [blame]	229	s++;
				230	saw_exponent = true;
				231	if (s == '+' \|\| s == '-') s++; // sign
				232	const char *first_non_digit = SkipDigits(s);
Chris Lattner	0b7f69d	2008-04-20 18:41:46 +0000	[diff] [blame]	233	if (first_non_digit != s) {
Reid Spencer	5f016e2	2007-07-11 17:01:13 +0000	[diff] [blame]	234	s = first_non_digit;
Chris Lattner	0b7f69d	2008-04-20 18:41:46 +0000	[diff] [blame]	235	} else {
Chris Lattner	70f66ab	2008-04-20 18:47:55 +0000	[diff] [blame]	236	Diag(PP.AdvanceToTokenCharacter(TokLoc, Exponent-begin),
Chris Lattner	0b7f69d	2008-04-20 18:41:46 +0000	[diff] [blame]	237	diag::err_exponent_has_no_digits);
				238	return;
Reid Spencer	5f016e2	2007-07-11 17:01:13 +0000	[diff] [blame]	239	}
				240	}
				241	}
				242
				243	SuffixBegin = s;
Chris Lattner	506b8de	2007-08-26 01:58:14 +0000	[diff] [blame]	244
				245	// Parse the suffix. At this point we can classify whether we have an FP or
				246	// integer constant.
				247	bool isFPConstant = isFloatingLiteral();
				248
				249	// Loop over all of the characters of the suffix. If we see something bad,
				250	// we break out of the loop.
				251	for (; s != ThisTokEnd; ++s) {
				252	switch (*s) {
				253	case 'f': // FP Suffix for "float"
				254	case 'F':
				255	if (!isFPConstant) break; // Error for integer constant.
Chris Lattner	6e400c2	2007-08-26 03:29:23 +0000	[diff] [blame]	256	if (isFloat \|\| isLong) break; // FF, LF invalid.
				257	isFloat = true;
Chris Lattner	506b8de	2007-08-26 01:58:14 +0000	[diff] [blame]	258	continue; // Success.
				259	case 'u':
				260	case 'U':
				261	if (isFPConstant) break; // Error for floating constant.
				262	if (isUnsigned) break; // Cannot be repeated.
				263	isUnsigned = true;
				264	continue; // Success.
				265	case 'l':
				266	case 'L':
				267	if (isLong \|\| isLongLong) break; // Cannot be repeated.
Chris Lattner	6e400c2	2007-08-26 03:29:23 +0000	[diff] [blame]	268	if (isFloat) break; // LF invalid.
Chris Lattner	506b8de	2007-08-26 01:58:14 +0000	[diff] [blame]	269
				270	// Check for long long. The L's need to be adjacent and the same case.
				271	if (s+1 != ThisTokEnd && s[1] == s[0]) {
				272	if (isFPConstant) break; // long long invalid for floats.
				273	isLongLong = true;
				274	++s; // Eat both of them.
				275	} else {
Reid Spencer	5f016e2	2007-07-11 17:01:13 +0000	[diff] [blame]	276	isLong = true;
Reid Spencer	5f016e2	2007-07-11 17:01:13 +0000	[diff] [blame]	277	}
Chris Lattner	506b8de	2007-08-26 01:58:14 +0000	[diff] [blame]	278	continue; // Success.
				279	case 'i':
Steve Naroff	0c29b22	2008-04-04 21:02:54 +0000	[diff] [blame]	280	if (PP.getLangOptions().Microsoft) {
				281	// Allow i8, i16, i32, i64, and i128.
				282	if (++s == ThisTokEnd) break;
				283	switch (*s) {
				284	case '8':
				285	s++; // i8 suffix
				286	break;
				287	case '1':
				288	if (++s == ThisTokEnd) break;
				289	if (*s == '6') s++; // i16 suffix
				290	else if (*s == '2') {
				291	if (++s == ThisTokEnd) break;
				292	if (*s == '8') s++; // i128 suffix
				293	}
				294	break;
				295	case '3':
				296	if (++s == ThisTokEnd) break;
				297	if (*s == '2') s++; // i32 suffix
				298	break;
				299	case '6':
				300	if (++s == ThisTokEnd) break;
				301	if (*s == '4') s++; // i64 suffix
				302	break;
				303	default:
				304	break;
				305	}
				306	break;
				307	}
				308	// fall through.
Chris Lattner	506b8de	2007-08-26 01:58:14 +0000	[diff] [blame]	309	case 'I':
				310	case 'j':
				311	case 'J':
				312	if (isImaginary) break; // Cannot be repeated.
				313	PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, s-begin),
				314	diag::ext_imaginary_constant);
				315	isImaginary = true;
				316	continue; // Success.
Reid Spencer	5f016e2	2007-07-11 17:01:13 +0000	[diff] [blame]	317	}
Chris Lattner	506b8de	2007-08-26 01:58:14 +0000	[diff] [blame]	318	// If we reached here, there was an error.
				319	break;
				320	}
				321
				322	// Report an error if there are any.
				323	if (s != ThisTokEnd) {
Chris Lattner	0b7f69d	2008-04-20 18:41:46 +0000	[diff] [blame]	324	Diag(PP.AdvanceToTokenCharacter(TokLoc, s-begin),
				325	isFPConstant ? diag::err_invalid_suffix_float_constant :
				326	diag::err_invalid_suffix_integer_constant,
Chris Lattner	506b8de	2007-08-26 01:58:14 +0000	[diff] [blame]	327	std::string(SuffixBegin, ThisTokEnd));
				328	return;
Reid Spencer	5f016e2	2007-07-11 17:01:13 +0000	[diff] [blame]	329	}
				330	}
				331
Chris Lattner	368328c	2008-06-30 06:39:54 +0000	[diff] [blame]	332	/// ParseNumberStartingWithZero - This method is called when the first character
				333	/// of the number is found to be a zero. This means it is either an octal
				334	/// number (like '04') or a hex number ('0x123a') a binary number ('0b1010') or
				335	/// a floating point number (01239.123e4). Eat the prefix, determining the
				336	/// radix etc.
				337	void NumericLiteralParser::ParseNumberStartingWithZero(SourceLocation TokLoc) {
				338	assert(s[0] == '0' && "Invalid method call");
				339	s++;
				340
				341	// Handle a hex number like 0x1234.
				342	if ((s == 'x' \|\| s == 'X') && (isxdigit(s[1]) \|\| s[1] == '.')) {
				343	s++;
				344	radix = 16;
				345	DigitsBegin = s;
				346	s = SkipHexDigits(s);
				347	if (s == ThisTokEnd) {
				348	// Done.
				349	} else if (*s == '.') {
				350	s++;
				351	saw_period = true;
				352	s = SkipHexDigits(s);
				353	}
				354	// A binary exponent can appear with or with a '.'. If dotted, the
				355	// binary exponent is required.
Chris Lattner	6ea6238	2008-07-25 18:18:34 +0000	[diff] [blame]	356	if (s == 'p' \|\| s == 'P') {
Chris Lattner	368328c	2008-06-30 06:39:54 +0000	[diff] [blame]	357	const char *Exponent = s;
				358	s++;
				359	saw_exponent = true;
				360	if (s == '+' \|\| s == '-') s++; // sign
				361	const char *first_non_digit = SkipDigits(s);
Chris Lattner	6ea6238	2008-07-25 18:18:34 +0000	[diff] [blame]	362	if (first_non_digit == s) {
Chris Lattner	368328c	2008-06-30 06:39:54 +0000	[diff] [blame]	363	Diag(PP.AdvanceToTokenCharacter(TokLoc, Exponent-ThisTokBegin),
				364	diag::err_exponent_has_no_digits);
Chris Lattner	6ea6238	2008-07-25 18:18:34 +0000	[diff] [blame]	365	return;
Chris Lattner	368328c	2008-06-30 06:39:54 +0000	[diff] [blame]	366	}
Chris Lattner	6ea6238	2008-07-25 18:18:34 +0000	[diff] [blame]	367	s = first_non_digit;
				368
				369	if (!PP.getLangOptions().HexFloats)
				370	Diag(TokLoc, diag::ext_hexconstant_invalid);
Chris Lattner	368328c	2008-06-30 06:39:54 +0000	[diff] [blame]	371	} else if (saw_period) {
				372	Diag(PP.AdvanceToTokenCharacter(TokLoc, s-ThisTokBegin),
				373	diag::err_hexconstant_requires_exponent);
				374	}
				375	return;
				376	}
				377
				378	// Handle simple binary numbers 0b01010
				379	if (s == 'b' \|\| s == 'B') {
				380	// 0b101010 is a GCC extension.
Chris Lattner	413d355	2008-06-30 06:44:49 +0000	[diff] [blame]	381	PP.Diag(TokLoc, diag::ext_binary_literal);
Chris Lattner	368328c	2008-06-30 06:39:54 +0000	[diff] [blame]	382	++s;
				383	radix = 2;
				384	DigitsBegin = s;
				385	s = SkipBinaryDigits(s);
				386	if (s == ThisTokEnd) {
				387	// Done.
				388	} else if (isxdigit(*s)) {
				389	Diag(PP.AdvanceToTokenCharacter(TokLoc, s-ThisTokBegin),
				390	diag::err_invalid_binary_digit, std::string(s, s+1));
Chris Lattner	368328c	2008-06-30 06:39:54 +0000	[diff] [blame]	391	}
Chris Lattner	413d355	2008-06-30 06:44:49 +0000	[diff] [blame]	392	// Other suffixes will be diagnosed by the caller.
Chris Lattner	368328c	2008-06-30 06:39:54 +0000	[diff] [blame]	393	return;
				394	}
				395
				396	// For now, the radix is set to 8. If we discover that we have a
				397	// floating point constant, the radix will change to 10. Octal floating
				398	// point constants are not permitted (only decimal and hexadecimal).
				399	radix = 8;
				400	DigitsBegin = s;
				401	s = SkipOctalDigits(s);
				402	if (s == ThisTokEnd)
				403	return; // Done, simple octal number like 01234
				404
Chris Lattner	413d355	2008-06-30 06:44:49 +0000	[diff] [blame]	405	// If we have some other non-octal digit that is a decimal digit, see if
				406	// this is part of a floating point number like 094.123 or 09e1.
				407	if (isdigit(*s)) {
				408	const char *EndDecimal = SkipDigits(s);
				409	if (EndDecimal[0] == '.' \|\| EndDecimal[0] == 'e' \|\| EndDecimal[0] == 'E') {
				410	s = EndDecimal;
				411	radix = 10;
				412	}
				413	}
				414
				415	// If we have a hex digit other than 'e' (which denotes a FP exponent) then
				416	// the code is using an incorrect base.
Chris Lattner	368328c	2008-06-30 06:39:54 +0000	[diff] [blame]	417	if (isxdigit(s) && s != 'e' && *s != 'E') {
				418	Diag(PP.AdvanceToTokenCharacter(TokLoc, s-ThisTokBegin),
				419	diag::err_invalid_octal_digit, std::string(s, s+1));
				420	return;
				421	}
				422
				423	if (*s == '.') {
				424	s++;
				425	radix = 10;
				426	saw_period = true;
Chris Lattner	413d355	2008-06-30 06:44:49 +0000	[diff] [blame]	427	s = SkipDigits(s); // Skip suffix.
Chris Lattner	368328c	2008-06-30 06:39:54 +0000	[diff] [blame]	428	}
				429	if (s == 'e' \|\| s == 'E') { // exponent
				430	const char *Exponent = s;
				431	s++;
				432	radix = 10;
				433	saw_exponent = true;
				434	if (s == '+' \|\| s == '-') s++; // sign
				435	const char *first_non_digit = SkipDigits(s);
				436	if (first_non_digit != s) {
				437	s = first_non_digit;
				438	} else {
				439	Diag(PP.AdvanceToTokenCharacter(TokLoc, Exponent-ThisTokBegin),
				440	diag::err_exponent_has_no_digits);
				441	return;
				442	}
				443	}
				444	}
				445
				446
Reid Spencer	5f016e2	2007-07-11 17:01:13 +0000	[diff] [blame]	447	/// GetIntegerValue - Convert this numeric literal value to an APInt that
				448	/// matches Val's input width. If there is an overflow, set Val to the low bits
				449	/// of the result and return true. Otherwise, return false.
				450	bool NumericLiteralParser::GetIntegerValue(llvm::APInt &Val) {
				451	Val = 0;
				452	s = DigitsBegin;
				453
				454	llvm::APInt RadixVal(Val.getBitWidth(), radix);
				455	llvm::APInt CharVal(Val.getBitWidth(), 0);
				456	llvm::APInt OldVal = Val;
				457
				458	bool OverflowOccurred = false;
				459	while (s < SuffixBegin) {
				460	unsigned C = HexDigitValue(*s++);
				461
				462	// If this letter is out of bound for this radix, reject it.
				463	assert(C < radix && "NumericLiteralParser ctor should have rejected this");
				464
				465	CharVal = C;
				466
				467	// Add the digit to the value in the appropriate radix. If adding in digits
				468	// made the value smaller, then this overflowed.
				469	OldVal = Val;
				470
				471	// Multiply by radix, did overflow occur on the multiply?
				472	Val *= RadixVal;
				473	OverflowOccurred \|= Val.udiv(RadixVal) != OldVal;
				474
				475	OldVal = Val;
				476	// Add value, did overflow occur on the value?
				477	Val += CharVal;
				478	OverflowOccurred \|= Val.ult(OldVal);
				479	OverflowOccurred \|= Val.ult(CharVal);
				480	}
				481	return OverflowOccurred;
				482	}
				483
Chris Lattner	525a050	2007-09-22 18:29:59 +0000	[diff] [blame]	484	llvm::APFloat NumericLiteralParser::
Ted Kremenek	427d5af	2007-11-26 23:12:30 +0000	[diff] [blame]	485	GetFloatValue(const llvm::fltSemantics &Format, bool* isExact) {
				486	using llvm::APFloat;
				487
Ted Kremenek	32e61bf	2007-11-29 00:54:29 +0000	[diff] [blame]	488	llvm::SmallVector<char,256> floatChars;
				489	for (unsigned i = 0, n = ThisTokEnd-ThisTokBegin; i != n; ++i)
				490	floatChars.push_back(ThisTokBegin[i]);
				491
				492	floatChars.push_back('\0');
				493
Ted Kremenek	427d5af	2007-11-26 23:12:30 +0000	[diff] [blame]	494	APFloat V (Format, APFloat::fcZero, false);
Ted Kremenek	427d5af	2007-11-26 23:12:30 +0000	[diff] [blame]	495	APFloat::opStatus status;
Ted Kremenek	32e61bf	2007-11-29 00:54:29 +0000	[diff] [blame]	496
				497	status = V.convertFromString(&floatChars[0],APFloat::rmNearestTiesToEven);
Ted Kremenek	427d5af	2007-11-26 23:12:30 +0000	[diff] [blame]	498
				499	if (isExact)
				500	*isExact = status == APFloat::opOK;
				501
				502	return V;
Reid Spencer	5f016e2	2007-07-11 17:01:13 +0000	[diff] [blame]	503	}
				504
				505	void NumericLiteralParser::Diag(SourceLocation Loc, unsigned DiagID,
				506	const std::string &M) {
				507	PP.Diag(Loc, DiagID, M);
				508	hadError = true;
				509	}
				510
				511
				512	CharLiteralParser::CharLiteralParser(const char begin, const char end,
				513	SourceLocation Loc, Preprocessor &PP) {
				514	// At this point we know that the character matches the regex "L?'.*'".
				515	HadError = false;
				516	Value = 0;
				517
				518	// Determine if this is a wide character.
				519	IsWide = begin[0] == 'L';
				520	if (IsWide) ++begin;
				521
				522	// Skip over the entry quote.
				523	assert(begin[0] == '\'' && "Invalid token lexed");
				524	++begin;
				525
				526	// FIXME: This assumes that 'int' is 32-bits in overflow calculation, and the
				527	// size of "value".
Chris Lattner	98be494	2008-03-05 18:54:05 +0000	[diff] [blame]	528	assert(PP.getTargetInfo().getIntWidth() == 32 &&
Reid Spencer	5f016e2	2007-07-11 17:01:13 +0000	[diff] [blame]	529	"Assumes sizeof(int) == 4 for now");
				530	// FIXME: This assumes that wchar_t is 32-bits for now.
Chris Lattner	98be494	2008-03-05 18:54:05 +0000	[diff] [blame]	531	assert(PP.getTargetInfo().getWCharWidth() == 32 &&
Reid Spencer	5f016e2	2007-07-11 17:01:13 +0000	[diff] [blame]	532	"Assumes sizeof(wchar_t) == 4 for now");
				533	// FIXME: This extensively assumes that 'char' is 8-bits.
Chris Lattner	98be494	2008-03-05 18:54:05 +0000	[diff] [blame]	534	assert(PP.getTargetInfo().getCharWidth() == 8 &&
Reid Spencer	5f016e2	2007-07-11 17:01:13 +0000	[diff] [blame]	535	"Assumes char is 8 bits");
				536
				537	bool isFirstChar = true;
				538	bool isMultiChar = false;
				539	while (begin[0] != '\'') {
				540	unsigned ResultChar;
				541	if (begin[0] != '\\') // If this is a normal character, consume it.
				542	ResultChar = *begin++;
				543	else // Otherwise, this is an escape character.
				544	ResultChar = ProcessCharEscape(begin, end, HadError, Loc, IsWide, PP);
				545
				546	// If this is a multi-character constant (e.g. 'abc'), handle it. These are
				547	// implementation defined (C99 6.4.4.4p10).
				548	if (!isFirstChar) {
				549	// If this is the second character being processed, do special handling.
				550	if (!isMultiChar) {
				551	isMultiChar = true;
				552
				553	// Warn about discarding the top bits for multi-char wide-character
				554	// constants (L'abcd').
				555	if (IsWide)
				556	PP.Diag(Loc, diag::warn_extraneous_wide_char_constant);
				557	}
				558
				559	if (IsWide) {
				560	// Emulate GCC's (unintentional?) behavior: L'ab' -> L'b'.
				561	Value = 0;
				562	} else {
				563	// Narrow character literals act as though their value is concatenated
				564	// in this implementation.
				565	if (((Value << 8) >> 8) != Value)
				566	PP.Diag(Loc, diag::warn_char_constant_too_large);
				567	Value <<= 8;
				568	}
				569	}
				570
				571	Value += ResultChar;
				572	isFirstChar = false;
				573	}
				574
				575	// If this is a single narrow character, sign extend it (e.g. '\xFF' is "-1")
				576	// if 'char' is signed for this target (C99 6.4.4.4p10). Note that multiple
				577	// character constants are not sign extended in the this implementation:
				578	// '\xFF\xFF' = 65536 and '\x0\xFF' = 255, which matches GCC.
				579	if (!IsWide && !isMultiChar && (Value & 128) &&
Chris Lattner	98be494	2008-03-05 18:54:05 +0000	[diff] [blame]	580	PP.getTargetInfo().isCharSigned())
Reid Spencer	5f016e2	2007-07-11 17:01:13 +0000	[diff] [blame]	581	Value = (signed char)Value;
				582	}
				583
				584
				585	/// string-literal: [C99 6.4.5]
				586	/// " [s-char-sequence] "
				587	/// L" [s-char-sequence] "
				588	/// s-char-sequence:
				589	/// s-char
				590	/// s-char-sequence s-char
				591	/// s-char:
				592	/// any source character except the double quote ",
				593	/// backslash \, or newline character
				594	/// escape-character
				595	/// universal-character-name
				596	/// escape-character: [C99 6.4.4.4]
				597	/// \ escape-code
				598	/// universal-character-name
				599	/// escape-code:
				600	/// character-escape-code
				601	/// octal-escape-code
				602	/// hex-escape-code
				603	/// character-escape-code: one of
				604	/// n t b r f v a
				605	/// \ ' " ?
				606	/// octal-escape-code:
				607	/// octal-digit
				608	/// octal-digit octal-digit
				609	/// octal-digit octal-digit octal-digit
				610	/// hex-escape-code:
				611	/// x hex-digit
				612	/// hex-escape-code hex-digit
				613	/// universal-character-name:
				614	/// \u hex-quad
				615	/// \U hex-quad hex-quad
				616	/// hex-quad:
				617	/// hex-digit hex-digit hex-digit hex-digit
				618	///
				619	StringLiteralParser::
Chris Lattner	d217773	2007-07-20 16:59:19 +0000	[diff] [blame]	620	StringLiteralParser(const Token *StringToks, unsigned NumStringToks,
Reid Spencer	5f016e2	2007-07-11 17:01:13 +0000	[diff] [blame]	621	Preprocessor &pp, TargetInfo &t)
				622	: PP(pp), Target(t) {
				623	// Scan all of the string portions, remember the max individual token length,
				624	// computing a bound on the concatenated string length, and see whether any
				625	// piece is a wide-string. If any of the string portions is a wide-string
				626	// literal, the result is a wide-string literal [C99 6.4.5p4].
				627	MaxTokenLength = StringToks[0].getLength();
				628	SizeBound = StringToks[0].getLength()-2; // -2 for "".
Chris Lattner	22f6bbc	2007-10-09 18:02:16 +0000	[diff] [blame]	629	AnyWide = StringToks[0].is(tok::wide_string_literal);
Reid Spencer	5f016e2	2007-07-11 17:01:13 +0000	[diff] [blame]	630
				631	hadError = false;
				632
				633	// Implement Translation Phase #6: concatenation of string literals
				634	/// (C99 5.1.1.2p1). The common case is only one string fragment.
				635	for (unsigned i = 1; i != NumStringToks; ++i) {
				636	// The string could be shorter than this if it needs cleaning, but this is a
				637	// reasonable bound, which is all we need.
				638	SizeBound += StringToks[i].getLength()-2; // -2 for "".
				639
				640	// Remember maximum string piece length.
				641	if (StringToks[i].getLength() > MaxTokenLength)
				642	MaxTokenLength = StringToks[i].getLength();
				643
				644	// Remember if we see any wide strings.
Chris Lattner	22f6bbc	2007-10-09 18:02:16 +0000	[diff] [blame]	645	AnyWide \|= StringToks[i].is(tok::wide_string_literal);
Reid Spencer	5f016e2	2007-07-11 17:01:13 +0000	[diff] [blame]	646	}
				647
				648
				649	// Include space for the null terminator.
				650	++SizeBound;
				651
				652	// TODO: K&R warning: "traditional C rejects string constant concatenation"
				653
				654	// Get the width in bytes of wchar_t. If no wchar_t strings are used, do not
				655	// query the target. As such, wchar_tByteWidth is only valid if AnyWide=true.
				656	wchar_tByteWidth = ~0U;
				657	if (AnyWide) {
Chris Lattner	98be494	2008-03-05 18:54:05 +0000	[diff] [blame]	658	wchar_tByteWidth = Target.getWCharWidth();
Reid Spencer	5f016e2	2007-07-11 17:01:13 +0000	[diff] [blame]	659	assert((wchar_tByteWidth & 7) == 0 && "Assumes wchar_t is byte multiple!");
				660	wchar_tByteWidth /= 8;
				661	}
				662
				663	// The output buffer size needs to be large enough to hold wide characters.
				664	// This is a worst-case assumption which basically corresponds to L"" "long".
				665	if (AnyWide)
				666	SizeBound *= wchar_tByteWidth;
				667
				668	// Size the temporary buffer to hold the result string data.
				669	ResultBuf.resize(SizeBound);
				670
				671	// Likewise, but for each string piece.
				672	llvm::SmallString<512> TokenBuf;
				673	TokenBuf.resize(MaxTokenLength);
				674
				675	// Loop over all the strings, getting their spelling, and expanding them to
				676	// wide strings as appropriate.
				677	ResultPtr = &ResultBuf[0]; // Next byte to fill in.
				678
Anders Carlsson	ee98ac5	2007-10-15 02:50:23 +0000	[diff] [blame]	679	Pascal = false;
				680
Reid Spencer	5f016e2	2007-07-11 17:01:13 +0000	[diff] [blame]	681	for (unsigned i = 0, e = NumStringToks; i != e; ++i) {
				682	const char *ThisTokBuf = &TokenBuf[0];
				683	// Get the spelling of the token, which eliminates trigraphs, etc. We know
				684	// that ThisTokBuf points to a buffer that is big enough for the whole token
				685	// and 'spelled' tokens can only shrink.
				686	unsigned ThisTokLen = PP.getSpelling(StringToks[i], ThisTokBuf);
				687	const char *ThisTokEnd = ThisTokBuf+ThisTokLen-1; // Skip end quote.
				688
				689	// TODO: Input character set mapping support.
				690
				691	// Skip L marker for wide strings.
				692	bool ThisIsWide = false;
				693	if (ThisTokBuf[0] == 'L') {
				694	++ThisTokBuf;
				695	ThisIsWide = true;
				696	}
				697
				698	assert(ThisTokBuf[0] == '"' && "Expected quote, lexer broken?");
				699	++ThisTokBuf;
				700
Anders Carlsson	ee98ac5	2007-10-15 02:50:23 +0000	[diff] [blame]	701	// Check if this is a pascal string
				702	if (pp.getLangOptions().PascalStrings && ThisTokBuf + 1 != ThisTokEnd &&
				703	ThisTokBuf[0] == '\\' && ThisTokBuf[1] == 'p') {
				704
				705	// If the \p sequence is found in the first token, we have a pascal string
				706	// Otherwise, if we already have a pascal string, ignore the first \p
				707	if (i == 0) {
				708	++ThisTokBuf;
				709	Pascal = true;
				710	} else if (Pascal)
				711	ThisTokBuf += 2;
				712	}
				713
Reid Spencer	5f016e2	2007-07-11 17:01:13 +0000	[diff] [blame]	714	while (ThisTokBuf != ThisTokEnd) {
				715	// Is this a span of non-escape characters?
				716	if (ThisTokBuf[0] != '\\') {
				717	const char *InStart = ThisTokBuf;
				718	do {
				719	++ThisTokBuf;
				720	} while (ThisTokBuf != ThisTokEnd && ThisTokBuf[0] != '\\');
				721
				722	// Copy the character span over.
				723	unsigned Len = ThisTokBuf-InStart;
				724	if (!AnyWide) {
				725	memcpy(ResultPtr, InStart, Len);
				726	ResultPtr += Len;
				727	} else {
				728	// Note: our internal rep of wide char tokens is always little-endian.
				729	for (; Len; --Len, ++InStart) {
				730	*ResultPtr++ = InStart[0];
				731	// Add zeros at the end.
				732	for (unsigned i = 1, e = wchar_tByteWidth; i != e; ++i)
				733	*ResultPtr++ = 0;
				734	}
				735	}
				736	continue;
				737	}
				738
				739	// Otherwise, this is an escape character. Process it.
				740	unsigned ResultChar = ProcessCharEscape(ThisTokBuf, ThisTokEnd, hadError,
				741	StringToks[i].getLocation(),
				742	ThisIsWide, PP);
				743
				744	// Note: our internal rep of wide char tokens is always little-endian.
				745	*ResultPtr++ = ResultChar & 0xFF;
				746
				747	if (AnyWide) {
				748	for (unsigned i = 1, e = wchar_tByteWidth; i != e; ++i)
				749	ResultPtr++ = ResultChar >> i8;
				750	}
				751	}
				752	}
				753
				754	// Add zero terminator.
				755	*ResultPtr = 0;
				756	if (AnyWide) {
				757	for (unsigned i = 1, e = wchar_tByteWidth; i != e; ++i)
				758	*ResultPtr++ = 0;
				759	}
Anders Carlsson	ee98ac5	2007-10-15 02:50:23 +0000	[diff] [blame]	760
				761	if (Pascal)
				762	ResultBuf[0] = ResultPtr-&ResultBuf[0]-1;
Reid Spencer	5f016e2	2007-07-11 17:01:13 +0000	[diff] [blame]	763	}