Blame - clang/lib/Lex/LiteralSupport.cpp - toolchain/llvm-project

blob: 23bbacebc646c9e08d294c91b7eaa8654ea91539 [file] [log] [blame]

Chris Lattner	2f5add6	2007-04-05 06:57:15 +0000	[diff] [blame]	1	//===--- LiteralSupport.cpp - Code to parse and process literals ----------===//
Steve Naroff	09ef474	2007-03-09 23:16:33 +0000	[diff] [blame]	2	//
				3	// The LLVM Compiler Infrastructure
				4	//
Chris Lattner	5b12ab8	2007-12-29 19:59:25 +0000	[diff] [blame]	5	// This file is distributed under the University of Illinois Open Source
				6	// License. See LICENSE.TXT for details.
Steve Naroff	09ef474	2007-03-09 23:16:33 +0000	[diff] [blame]	7	//
				8	//===----------------------------------------------------------------------===//
				9	//
Chris Lattner	2f5add6	2007-04-05 06:57:15 +0000	[diff] [blame]	10	// This file implements the NumericLiteralParser, CharLiteralParser, and
				11	// StringLiteralParser interfaces.
Steve Naroff	09ef474	2007-03-09 23:16:33 +0000	[diff] [blame]	12	//
				13	//===----------------------------------------------------------------------===//
				14
				15	#include "clang/Lex/LiteralSupport.h"
Jordan Rose	a7d0384	2013-02-08 22:30:41 +0000	[diff] [blame]	16	#include "clang/Basic/CharInfo.h"
Chandler Carruth	3a02247	2012-12-04 09:13:33 +0000	[diff] [blame]	17	#include "clang/Basic/TargetInfo.h"
				18	#include "clang/Lex/LexDiagnostic.h"
				19	#include "clang/Lex/Preprocessor.h"
Steve Naroff	4f88b31	2007-03-13 22:37:02 +0000	[diff] [blame]	20	#include "llvm/ADT/StringExtras.h"
Mehdi Amini	9670f84	2016-07-18 19:02:11 +0000	[diff] [blame^]	21	#include "llvm/ADT/StringSwitch.h"
Dmitri Gribenko	9feeef4	2013-01-30 12:06:08 +0000	[diff] [blame]	22	#include "llvm/Support/ConvertUTF.h"
David Blaikie	76bd3c8	2011-09-23 05:35:21 +0000	[diff] [blame]	23	#include "llvm/Support/ErrorHandling.h"
Dmitri Gribenko	9feeef4	2013-01-30 12:06:08 +0000	[diff] [blame]	24
Steve Naroff	09ef474	2007-03-09 23:16:33 +0000	[diff] [blame]	25	using namespace clang;
				26
Douglas Gregor	fb65e59	2011-07-27 05:40:30 +0000	[diff] [blame]	27	static unsigned getCharWidth(tok::TokenKind kind, const TargetInfo &Target) {
				28	switch (kind) {
David Blaikie	83d382b	2011-09-23 05:06:16 +0000	[diff] [blame]	29	default: llvm_unreachable("Unknown token type!");
Douglas Gregor	fb65e59	2011-07-27 05:40:30 +0000	[diff] [blame]	30	case tok::char_constant:
				31	case tok::string_literal:
Richard Smith	3e3a705	2014-11-08 06:08:42 +0000	[diff] [blame]	32	case tok::utf8_char_constant:
Douglas Gregor	fb65e59	2011-07-27 05:40:30 +0000	[diff] [blame]	33	case tok::utf8_string_literal:
				34	return Target.getCharWidth();
				35	case tok::wide_char_constant:
				36	case tok::wide_string_literal:
				37	return Target.getWCharWidth();
				38	case tok::utf16_char_constant:
				39	case tok::utf16_string_literal:
				40	return Target.getChar16Width();
				41	case tok::utf32_char_constant:
				42	case tok::utf32_string_literal:
				43	return Target.getChar32Width();
				44	}
				45	}
				46
Seth Cantrell	4cfc817	2012-10-28 18:24:46 +0000	[diff] [blame]	47	static CharSourceRange MakeCharSourceRange(const LangOptions &Features,
				48	FullSourceLoc TokLoc,
				49	const char *TokBegin,
				50	const char *TokRangeBegin,
				51	const char *TokRangeEnd) {
				52	SourceLocation Begin =
				53	Lexer::AdvanceToTokenCharacter(TokLoc, TokRangeBegin - TokBegin,
				54	TokLoc.getManager(), Features);
				55	SourceLocation End =
				56	Lexer::AdvanceToTokenCharacter(Begin, TokRangeEnd - TokRangeBegin,
				57	TokLoc.getManager(), Features);
				58	return CharSourceRange::getCharRange(Begin, End);
				59	}
				60
Richard Smith	639b8d0	2012-09-08 07:16:20 +0000	[diff] [blame]	61	/// \brief Produce a diagnostic highlighting some portion of a literal.
				62	///
				63	/// Emits the diagnostic \p DiagID, highlighting the range of characters from
				64	/// \p TokRangeBegin (inclusive) to \p TokRangeEnd (exclusive), which must be
				65	/// a substring of a spelling buffer for the token beginning at \p TokBegin.
				66	static DiagnosticBuilder Diag(DiagnosticsEngine *Diags,
				67	const LangOptions &Features, FullSourceLoc TokLoc,
				68	const char TokBegin, const char TokRangeBegin,
				69	const char *TokRangeEnd, unsigned DiagID) {
				70	SourceLocation Begin =
				71	Lexer::AdvanceToTokenCharacter(TokLoc, TokRangeBegin - TokBegin,
				72	TokLoc.getManager(), Features);
Seth Cantrell	4cfc817	2012-10-28 18:24:46 +0000	[diff] [blame]	73	return Diags->Report(Begin, DiagID) <<
				74	MakeCharSourceRange(Features, TokLoc, TokBegin, TokRangeBegin, TokRangeEnd);
Richard Smith	639b8d0	2012-09-08 07:16:20 +0000	[diff] [blame]	75	}
				76
Chris Lattner	2f5add6	2007-04-05 06:57:15 +0000	[diff] [blame]	77	/// ProcessCharEscape - Parse a standard C escape sequence, which can occur in
				78	/// either a character or a string literal.
Richard Smith	639b8d0	2012-09-08 07:16:20 +0000	[diff] [blame]	79	static unsigned ProcessCharEscape(const char *ThisTokBegin,
				80	const char *&ThisTokBuf,
Chris Lattner	2f5add6	2007-04-05 06:57:15 +0000	[diff] [blame]	81	const char *ThisTokEnd, bool &HadError,
Douglas Gregor	fb65e59	2011-07-27 05:40:30 +0000	[diff] [blame]	82	FullSourceLoc Loc, unsigned CharWidth,
Richard Smith	639b8d0	2012-09-08 07:16:20 +0000	[diff] [blame]	83	DiagnosticsEngine *Diags,
				84	const LangOptions &Features) {
				85	const char *EscapeBegin = ThisTokBuf;
				86
Chris Lattner	2f5add6	2007-04-05 06:57:15 +0000	[diff] [blame]	87	// Skip the '\' char.
				88	++ThisTokBuf;
				89
				90	// We know that this character can't be off the end of the buffer, because
				91	// that would have been \", which would not have been the end of string.
				92	unsigned ResultChar = *ThisTokBuf++;
				93	switch (ResultChar) {
				94	// These map to themselves.
				95	case '\\': case '\'': case '"': case '?': break;
Mike Stump	11289f4	2009-09-09 15:08:12 +0000	[diff] [blame]	96
Chris Lattner	2f5add6	2007-04-05 06:57:15 +0000	[diff] [blame]	97	// These have fixed mappings.
				98	case 'a':
				99	// TODO: K&R: the meaning of '\\a' is different in traditional C
				100	ResultChar = 7;
				101	break;
				102	case 'b':
				103	ResultChar = 8;
				104	break;
				105	case 'e':
Chris Lattner	7a02bfd	2010-11-17 06:26:08 +0000	[diff] [blame]	106	if (Diags)
Richard Smith	639b8d0	2012-09-08 07:16:20 +0000	[diff] [blame]	107	Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,
				108	diag::ext_nonstandard_escape) << "e";
Chris Lattner	2f5add6	2007-04-05 06:57:15 +0000	[diff] [blame]	109	ResultChar = 27;
				110	break;
Eli Friedman	28a00aa	2009-06-10 01:32:39 +0000	[diff] [blame]	111	case 'E':
Chris Lattner	7a02bfd	2010-11-17 06:26:08 +0000	[diff] [blame]	112	if (Diags)
Richard Smith	639b8d0	2012-09-08 07:16:20 +0000	[diff] [blame]	113	Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,
				114	diag::ext_nonstandard_escape) << "E";
Eli Friedman	28a00aa	2009-06-10 01:32:39 +0000	[diff] [blame]	115	ResultChar = 27;
				116	break;
Chris Lattner	2f5add6	2007-04-05 06:57:15 +0000	[diff] [blame]	117	case 'f':
				118	ResultChar = 12;
				119	break;
				120	case 'n':
				121	ResultChar = 10;
				122	break;
				123	case 'r':
				124	ResultChar = 13;
				125	break;
				126	case 't':
				127	ResultChar = 9;
				128	break;
				129	case 'v':
				130	ResultChar = 11;
				131	break;
Chris Lattner	c10adde	2007-05-20 05:00:58 +0000	[diff] [blame]	132	case 'x': { // Hex escape.
				133	ResultChar = 0;
Jordan Rose	a7d0384	2013-02-08 22:30:41 +0000	[diff] [blame]	134	if (ThisTokBuf == ThisTokEnd \|\| !isHexDigit(*ThisTokBuf)) {
Chris Lattner	7a02bfd	2010-11-17 06:26:08 +0000	[diff] [blame]	135	if (Diags)
Richard Smith	639b8d0	2012-09-08 07:16:20 +0000	[diff] [blame]	136	Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,
Jordan Rose	aa89cf1	2013-01-24 20:50:13 +0000	[diff] [blame]	137	diag::err_hex_escape_no_digits) << "x";
Chris Lattner	2f5add6	2007-04-05 06:57:15 +0000	[diff] [blame]	138	HadError = 1;
Chris Lattner	2f5add6	2007-04-05 06:57:15 +0000	[diff] [blame]	139	break;
				140	}
Mike Stump	11289f4	2009-09-09 15:08:12 +0000	[diff] [blame]	141
Chris Lattner	812eda8	2007-05-20 05:17:04 +0000	[diff] [blame]	142	// Hex escapes are a maximal series of hex digits.
Chris Lattner	c10adde	2007-05-20 05:00:58 +0000	[diff] [blame]	143	bool Overflow = false;
				144	for (; ThisTokBuf != ThisTokEnd; ++ThisTokBuf) {
Jordan Rose	78ed86a	2013-01-18 22:33:58 +0000	[diff] [blame]	145	int CharVal = llvm::hexDigitValue(ThisTokBuf[0]);
Chris Lattner	c10adde	2007-05-20 05:00:58 +0000	[diff] [blame]	146	if (CharVal == -1) break;
Chris Lattner	59f09b6	2008-09-30 20:45:40 +0000	[diff] [blame]	147	// About to shift out a digit?
David Blaikie	96cedb5	2015-03-23 19:54:44 +0000	[diff] [blame]	148	if (ResultChar & 0xF0000000)
				149	Overflow = true;
Chris Lattner	c10adde	2007-05-20 05:00:58 +0000	[diff] [blame]	150	ResultChar <<= 4;
				151	ResultChar \|= CharVal;
				152	}
				153
				154	// See if any bits will be truncated when evaluated as a character.
Chris Lattner	c10adde	2007-05-20 05:00:58 +0000	[diff] [blame]	155	if (CharWidth != 32 && (ResultChar >> CharWidth) != 0) {
				156	Overflow = true;
				157	ResultChar &= ~0U >> (32-CharWidth);
				158	}
Mike Stump	11289f4	2009-09-09 15:08:12 +0000	[diff] [blame]	159
Chris Lattner	c10adde	2007-05-20 05:00:58 +0000	[diff] [blame]	160	// Check for overflow.
Chris Lattner	7a02bfd	2010-11-17 06:26:08 +0000	[diff] [blame]	161	if (Overflow && Diags) // Too many digits to fit in
Richard Smith	639b8d0	2012-09-08 07:16:20 +0000	[diff] [blame]	162	Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,
Craig Topper	7f5ff21	2015-11-14 02:09:55 +0000	[diff] [blame]	163	diag::err_escape_too_large) << 0;
Chris Lattner	2f5add6	2007-04-05 06:57:15 +0000	[diff] [blame]	164	break;
Chris Lattner	c10adde	2007-05-20 05:00:58 +0000	[diff] [blame]	165	}
Chris Lattner	2f5add6	2007-04-05 06:57:15 +0000	[diff] [blame]	166	case '0': case '1': case '2': case '3':
Chris Lattner	812eda8	2007-05-20 05:17:04 +0000	[diff] [blame]	167	case '4': case '5': case '6': case '7': {
Chris Lattner	2f5add6	2007-04-05 06:57:15 +0000	[diff] [blame]	168	// Octal escapes.
Chris Lattner	3f4b6e3	2007-06-09 06:20:47 +0000	[diff] [blame]	169	--ThisTokBuf;
Chris Lattner	812eda8	2007-05-20 05:17:04 +0000	[diff] [blame]	170	ResultChar = 0;
				171
				172	// Octal escapes are a series of octal digits with maximum length 3.
				173	// "\0123" is a two digit sequence equal to "\012" "3".
				174	unsigned NumDigits = 0;
				175	do {
				176	ResultChar <<= 3;
				177	ResultChar \|= *ThisTokBuf++ - '0';
				178	++NumDigits;
				179	} while (ThisTokBuf != ThisTokEnd && NumDigits < 3 &&
				180	ThisTokBuf[0] >= '0' && ThisTokBuf[0] <= '7');
Mike Stump	11289f4	2009-09-09 15:08:12 +0000	[diff] [blame]	181
Chris Lattner	812eda8	2007-05-20 05:17:04 +0000	[diff] [blame]	182	// Check for overflow. Reject '\777', but not L'\777'.
Chris Lattner	812eda8	2007-05-20 05:17:04 +0000	[diff] [blame]	183	if (CharWidth != 32 && (ResultChar >> CharWidth) != 0) {
Chris Lattner	7a02bfd	2010-11-17 06:26:08 +0000	[diff] [blame]	184	if (Diags)
Richard Smith	639b8d0	2012-09-08 07:16:20 +0000	[diff] [blame]	185	Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,
Craig Topper	7f5ff21	2015-11-14 02:09:55 +0000	[diff] [blame]	186	diag::err_escape_too_large) << 1;
Chris Lattner	812eda8	2007-05-20 05:17:04 +0000	[diff] [blame]	187	ResultChar &= ~0U >> (32-CharWidth);
				188	}
Chris Lattner	2f5add6	2007-04-05 06:57:15 +0000	[diff] [blame]	189	break;
Chris Lattner	812eda8	2007-05-20 05:17:04 +0000	[diff] [blame]	190	}
Mike Stump	11289f4	2009-09-09 15:08:12 +0000	[diff] [blame]	191
Chris Lattner	2f5add6	2007-04-05 06:57:15 +0000	[diff] [blame]	192	// Otherwise, these are not valid escapes.
				193	case '(': case '{': case '[': case '%':
				194	// GCC accepts these as extensions. We warn about them as such though.
Chris Lattner	7a02bfd	2010-11-17 06:26:08 +0000	[diff] [blame]	195	if (Diags)
Richard Smith	639b8d0	2012-09-08 07:16:20 +0000	[diff] [blame]	196	Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,
				197	diag::ext_nonstandard_escape)
				198	<< std::string(1, ResultChar);
Eli Friedman	5d72d41	2009-04-28 00:51:18 +0000	[diff] [blame]	199	break;
Chris Lattner	2f5add6	2007-04-05 06:57:15 +0000	[diff] [blame]	200	default:
Craig Topper	d2d442c	2014-05-17 23:10:59 +0000	[diff] [blame]	201	if (!Diags)
Douglas Gregor	9af0302	2010-05-26 05:35:51 +0000	[diff] [blame]	202	break;
Richard Smith	639b8d0	2012-09-08 07:16:20 +0000	[diff] [blame]	203
Jordan Rose	a7d0384	2013-02-08 22:30:41 +0000	[diff] [blame]	204	if (isPrintable(ResultChar))
Richard Smith	639b8d0	2012-09-08 07:16:20 +0000	[diff] [blame]	205	Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,
				206	diag::ext_unknown_escape)
				207	<< std::string(1, ResultChar);
Chris Lattner	59acca5	2008-11-22 07:23:31 +0000	[diff] [blame]	208	else
Richard Smith	639b8d0	2012-09-08 07:16:20 +0000	[diff] [blame]	209	Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,
				210	diag::ext_unknown_escape)
				211	<< "x" + llvm::utohexstr(ResultChar);
Chris Lattner	2f5add6	2007-04-05 06:57:15 +0000	[diff] [blame]	212	break;
				213	}
Mike Stump	11289f4	2009-09-09 15:08:12 +0000	[diff] [blame]	214
Chris Lattner	2f5add6	2007-04-05 06:57:15 +0000	[diff] [blame]	215	return ResultChar;
				216	}
				217
Richard Smith	8b7258b	2014-02-17 21:52:30 +0000	[diff] [blame]	218	static void appendCodePoint(unsigned Codepoint,
				219	llvm::SmallVectorImpl<char> &Str) {
				220	char ResultBuf[4];
				221	char *ResultPtr = ResultBuf;
				222	bool Res = llvm::ConvertCodePointToUTF8(Codepoint, ResultPtr);
				223	(void)Res;
				224	assert(Res && "Unexpected conversion failure");
				225	Str.append(ResultBuf, ResultPtr);
				226	}
				227
				228	void clang::expandUCNs(SmallVectorImpl<char> &Buf, StringRef Input) {
				229	for (StringRef::iterator I = Input.begin(), E = Input.end(); I != E; ++I) {
				230	if (*I != '\\') {
				231	Buf.push_back(*I);
				232	continue;
				233	}
				234
				235	++I;
				236	assert(I == 'u' \|\| I == 'U');
				237
				238	unsigned NumHexDigits;
				239	if (*I == 'u')
				240	NumHexDigits = 4;
				241	else
				242	NumHexDigits = 8;
				243
				244	assert(I + NumHexDigits <= E);
				245
				246	uint32_t CodePoint = 0;
				247	for (++I; NumHexDigits != 0; ++I, --NumHexDigits) {
				248	unsigned Value = llvm::hexDigitValue(*I);
				249	assert(Value != -1U);
				250
				251	CodePoint <<= 4;
				252	CodePoint += Value;
				253	}
				254
				255	appendCodePoint(CodePoint, Buf);
				256	--I;
				257	}
				258	}
				259
Steve Naroff	7b753d2	2009-03-30 23:46:03 +0000	[diff] [blame]	260	/// ProcessUCNEscape - Read the Universal Character Name, check constraints and
Nico Weber	a6bde81	2010-10-09 00:27:47 +0000	[diff] [blame]	261	/// return the UTF32.
Richard Smith	2a70e65	2012-03-09 22:27:51 +0000	[diff] [blame]	262	static bool ProcessUCNEscape(const char ThisTokBegin, const char &ThisTokBuf,
				263	const char *ThisTokEnd,
Nico Weber	a6bde81	2010-10-09 00:27:47 +0000	[diff] [blame]	264	uint32_t &UcnVal, unsigned short &UcnLen,
David Blaikie	9c902b5	2011-09-25 23:23:43 +0000	[diff] [blame]	265	FullSourceLoc Loc, DiagnosticsEngine *Diags,
Seth Cantrell	8b2b677	2012-01-18 12:27:04 +0000	[diff] [blame]	266	const LangOptions &Features,
				267	bool in_char_string_literal = false) {
Richard Smith	2a70e65	2012-03-09 22:27:51 +0000	[diff] [blame]	268	const char *UcnBegin = ThisTokBuf;
Mike Stump	11289f4	2009-09-09 15:08:12 +0000	[diff] [blame]	269
Steve Naroff	7b753d2	2009-03-30 23:46:03 +0000	[diff] [blame]	270	// Skip the '\u' char's.
				271	ThisTokBuf += 2;
Chris Lattner	2f5add6	2007-04-05 06:57:15 +0000	[diff] [blame]	272
Jordan Rose	a7d0384	2013-02-08 22:30:41 +0000	[diff] [blame]	273	if (ThisTokBuf == ThisTokEnd \|\| !isHexDigit(*ThisTokBuf)) {
Chris Lattner	bde1b81	2010-11-17 06:46:14 +0000	[diff] [blame]	274	if (Diags)
Richard Smith	639b8d0	2012-09-08 07:16:20 +0000	[diff] [blame]	275	Diag(Diags, Features, Loc, ThisTokBegin, UcnBegin, ThisTokBuf,
Jordan Rose	aa89cf1	2013-01-24 20:50:13 +0000	[diff] [blame]	276	diag::err_hex_escape_no_digits) << StringRef(&ThisTokBuf[-1], 1);
Nico Weber	a6bde81	2010-10-09 00:27:47 +0000	[diff] [blame]	277	return false;
Steve Naroff	7b753d2	2009-03-30 23:46:03 +0000	[diff] [blame]	278	}
Nico Weber	a6bde81	2010-10-09 00:27:47 +0000	[diff] [blame]	279	UcnLen = (ThisTokBuf[-1] == 'u' ? 4 : 8);
Fariborz Jahanian	abaae2b	2010-08-31 23:34:27 +0000	[diff] [blame]	280	unsigned short UcnLenSave = UcnLen;
Nico Weber	a6bde81	2010-10-09 00:27:47 +0000	[diff] [blame]	281	for (; ThisTokBuf != ThisTokEnd && UcnLenSave; ++ThisTokBuf, UcnLenSave--) {
Jordan Rose	78ed86a	2013-01-18 22:33:58 +0000	[diff] [blame]	282	int CharVal = llvm::hexDigitValue(ThisTokBuf[0]);
Steve Naroff	7b753d2	2009-03-30 23:46:03 +0000	[diff] [blame]	283	if (CharVal == -1) break;
				284	UcnVal <<= 4;
				285	UcnVal \|= CharVal;
				286	}
				287	// If we didn't consume the proper number of digits, there is a problem.
Nico Weber	a6bde81	2010-10-09 00:27:47 +0000	[diff] [blame]	288	if (UcnLenSave) {
Richard Smith	639b8d0	2012-09-08 07:16:20 +0000	[diff] [blame]	289	if (Diags)
				290	Diag(Diags, Features, Loc, ThisTokBegin, UcnBegin, ThisTokBuf,
				291	diag::err_ucn_escape_incomplete);
Nico Weber	a6bde81	2010-10-09 00:27:47 +0000	[diff] [blame]	292	return false;
Steve Naroff	7b753d2	2009-03-30 23:46:03 +0000	[diff] [blame]	293	}
Richard Smith	2a70e65	2012-03-09 22:27:51 +0000	[diff] [blame]	294
Seth Cantrell	8b2b677	2012-01-18 12:27:04 +0000	[diff] [blame]	295	// Check UCN constraints (C99 6.4.3p2) [C++11 lex.charset p2]
Richard Smith	2a70e65	2012-03-09 22:27:51 +0000	[diff] [blame]	296	if ((0xD800 <= UcnVal && UcnVal <= 0xDFFF) \|\| // surrogate codepoints
				297	UcnVal > 0x10FFFF) { // maximum legal UTF32 value
Chris Lattner	bde1b81	2010-11-17 06:46:14 +0000	[diff] [blame]	298	if (Diags)
Richard Smith	639b8d0	2012-09-08 07:16:20 +0000	[diff] [blame]	299	Diag(Diags, Features, Loc, ThisTokBegin, UcnBegin, ThisTokBuf,
				300	diag::err_ucn_escape_invalid);
Nico Weber	a6bde81	2010-10-09 00:27:47 +0000	[diff] [blame]	301	return false;
				302	}
Richard Smith	2a70e65	2012-03-09 22:27:51 +0000	[diff] [blame]	303
				304	// C++11 allows UCNs that refer to control characters and basic source
				305	// characters inside character and string literals
				306	if (UcnVal < 0xa0 &&
				307	(UcnVal != 0x24 && UcnVal != 0x40 && UcnVal != 0x60)) { // $, @, `
Richard Smith	2bf7fdb	2013-01-02 11:42:31 +0000	[diff] [blame]	308	bool IsError = (!Features.CPlusPlus11 \|\| !in_char_string_literal);
Richard Smith	2a70e65	2012-03-09 22:27:51 +0000	[diff] [blame]	309	if (Diags) {
Richard Smith	2a70e65	2012-03-09 22:27:51 +0000	[diff] [blame]	310	char BasicSCSChar = UcnVal;
				311	if (UcnVal >= 0x20 && UcnVal < 0x7f)
Richard Smith	639b8d0	2012-09-08 07:16:20 +0000	[diff] [blame]	312	Diag(Diags, Features, Loc, ThisTokBegin, UcnBegin, ThisTokBuf,
				313	IsError ? diag::err_ucn_escape_basic_scs :
				314	diag::warn_cxx98_compat_literal_ucn_escape_basic_scs)
				315	<< StringRef(&BasicSCSChar, 1);
Richard Smith	2a70e65	2012-03-09 22:27:51 +0000	[diff] [blame]	316	else
Richard Smith	639b8d0	2012-09-08 07:16:20 +0000	[diff] [blame]	317	Diag(Diags, Features, Loc, ThisTokBegin, UcnBegin, ThisTokBuf,
				318	IsError ? diag::err_ucn_control_character :
				319	diag::warn_cxx98_compat_literal_ucn_control_character);
Richard Smith	2a70e65	2012-03-09 22:27:51 +0000	[diff] [blame]	320	}
				321	if (IsError)
				322	return false;
				323	}
				324
Richard Smith	639b8d0	2012-09-08 07:16:20 +0000	[diff] [blame]	325	if (!Features.CPlusPlus && !Features.C99 && Diags)
				326	Diag(Diags, Features, Loc, ThisTokBegin, UcnBegin, ThisTokBuf,
Jordan Rose	c0cba27	2013-01-27 20:12:04 +0000	[diff] [blame]	327	diag::warn_ucn_not_valid_in_c89_literal);
Richard Smith	639b8d0	2012-09-08 07:16:20 +0000	[diff] [blame]	328
Nico Weber	a6bde81	2010-10-09 00:27:47 +0000	[diff] [blame]	329	return true;
				330	}
				331
Richard Smith	4060f77	2012-06-13 05:37:23 +0000	[diff] [blame]	332	/// MeasureUCNEscape - Determine the number of bytes within the resulting string
				333	/// which this UCN will occupy.
				334	static int MeasureUCNEscape(const char ThisTokBegin, const char &ThisTokBuf,
				335	const char *ThisTokEnd, unsigned CharByteWidth,
				336	const LangOptions &Features, bool &HadError) {
				337	// UTF-32: 4 bytes per escape.
				338	if (CharByteWidth == 4)
				339	return 4;
				340
				341	uint32_t UcnVal = 0;
				342	unsigned short UcnLen = 0;
				343	FullSourceLoc Loc;
				344
				345	if (!ProcessUCNEscape(ThisTokBegin, ThisTokBuf, ThisTokEnd, UcnVal,
Craig Topper	d2d442c	2014-05-17 23:10:59 +0000	[diff] [blame]	346	UcnLen, Loc, nullptr, Features, true)) {
Richard Smith	4060f77	2012-06-13 05:37:23 +0000	[diff] [blame]	347	HadError = true;
				348	return 0;
				349	}
				350
				351	// UTF-16: 2 bytes for BMP, 4 bytes otherwise.
				352	if (CharByteWidth == 2)
				353	return UcnVal <= 0xFFFF ? 2 : 4;
				354
				355	// UTF-8.
				356	if (UcnVal < 0x80)
				357	return 1;
				358	if (UcnVal < 0x800)
				359	return 2;
				360	if (UcnVal < 0x10000)
				361	return 3;
				362	return 4;
				363	}
				364
Nico Weber	a6bde81	2010-10-09 00:27:47 +0000	[diff] [blame]	365	/// EncodeUCNEscape - Read the Universal Character Name, check constraints and
				366	/// convert the UTF32 to UTF8 or UTF16. This is a subroutine of
				367	/// StringLiteralParser. When we decide to implement UCN's for identifiers,
				368	/// we will likely rework our support for UCN's.
Richard Smith	2a70e65	2012-03-09 22:27:51 +0000	[diff] [blame]	369	static void EncodeUCNEscape(const char ThisTokBegin, const char &ThisTokBuf,
				370	const char *ThisTokEnd,
Chris Lattner	2be8aa9	2010-11-17 07:12:42 +0000	[diff] [blame]	371	char *&ResultBuf, bool &HadError,
Douglas Gregor	fb65e59	2011-07-27 05:40:30 +0000	[diff] [blame]	372	FullSourceLoc Loc, unsigned CharByteWidth,
David Blaikie	9c902b5	2011-09-25 23:23:43 +0000	[diff] [blame]	373	DiagnosticsEngine *Diags,
				374	const LangOptions &Features) {
Nico Weber	a6bde81	2010-10-09 00:27:47 +0000	[diff] [blame]	375	typedef uint32_t UTF32;
				376	UTF32 UcnVal = 0;
				377	unsigned short UcnLen = 0;
Richard Smith	2a70e65	2012-03-09 22:27:51 +0000	[diff] [blame]	378	if (!ProcessUCNEscape(ThisTokBegin, ThisTokBuf, ThisTokEnd, UcnVal, UcnLen,
				379	Loc, Diags, Features, true)) {
Richard Smith	4060f77	2012-06-13 05:37:23 +0000	[diff] [blame]	380	HadError = true;
Steve Naroff	7b753d2	2009-03-30 23:46:03 +0000	[diff] [blame]	381	return;
				382	}
Nico Weber	a6bde81	2010-10-09 00:27:47 +0000	[diff] [blame]	383
Eli Friedman	f9edb00	2013-09-18 23:23:13 +0000	[diff] [blame]	384	assert((CharByteWidth == 1 \|\| CharByteWidth == 2 \|\| CharByteWidth == 4) &&
Douglas Gregor	fb65e59	2011-07-27 05:40:30 +0000	[diff] [blame]	385	"only character widths of 1, 2, or 4 bytes supported");
Nico Weber	9762e0a	2010-10-06 04:57:26 +0000	[diff] [blame]	386
Douglas Gregor	fb65e59	2011-07-27 05:40:30 +0000	[diff] [blame]	387	(void)UcnLen;
				388	assert((UcnLen== 4 \|\| UcnLen== 8) && "only ucn length of 4 or 8 supported");
Nico Weber	9762e0a	2010-10-06 04:57:26 +0000	[diff] [blame]	389
Douglas Gregor	fb65e59	2011-07-27 05:40:30 +0000	[diff] [blame]	390	if (CharByteWidth == 4) {
Eli Friedman	d137079	2011-11-02 23:06:23 +0000	[diff] [blame]	391	// FIXME: Make the type of the result buffer correct instead of
				392	// using reinterpret_cast.
				393	UTF32 ResultPtr = reinterpret_cast<UTF32>(ResultBuf);
				394	*ResultPtr = UcnVal;
				395	ResultBuf += 4;
Douglas Gregor	fb65e59	2011-07-27 05:40:30 +0000	[diff] [blame]	396	return;
				397	}
				398
				399	if (CharByteWidth == 2) {
Eli Friedman	d137079	2011-11-02 23:06:23 +0000	[diff] [blame]	400	// FIXME: Make the type of the result buffer correct instead of
				401	// using reinterpret_cast.
				402	UTF16 ResultPtr = reinterpret_cast<UTF16>(ResultBuf);
				403
Richard Smith	0948d93	2012-06-13 05:41:29 +0000	[diff] [blame]	404	if (UcnVal <= (UTF32)0xFFFF) {
Eli Friedman	d137079	2011-11-02 23:06:23 +0000	[diff] [blame]	405	*ResultPtr = UcnVal;
				406	ResultBuf += 2;
Nico Weber	9762e0a	2010-10-06 04:57:26 +0000	[diff] [blame]	407	return;
				408	}
Nico Weber	9762e0a	2010-10-06 04:57:26 +0000	[diff] [blame]	409
Eli Friedman	d137079	2011-11-02 23:06:23 +0000	[diff] [blame]	410	// Convert to UTF16.
Nico Weber	9762e0a	2010-10-06 04:57:26 +0000	[diff] [blame]	411	UcnVal -= 0x10000;
Eli Friedman	d137079	2011-11-02 23:06:23 +0000	[diff] [blame]	412	*ResultPtr = 0xD800 + (UcnVal >> 10);
				413	*(ResultPtr+1) = 0xDC00 + (UcnVal & 0x3FF);
				414	ResultBuf += 4;
Fariborz Jahanian	abaae2b	2010-08-31 23:34:27 +0000	[diff] [blame]	415	return;
				416	}
Douglas Gregor	fb65e59	2011-07-27 05:40:30 +0000	[diff] [blame]	417
				418	assert(CharByteWidth == 1 && "UTF-8 encoding is only for 1 byte characters");
				419
Steve Naroff	7b753d2	2009-03-30 23:46:03 +0000	[diff] [blame]	420	// Now that we've parsed/checked the UCN, we convert from UTF32->UTF8.
				421	// The conversion below was inspired by:
				422	// http://www.unicode.org/Public/PROGRAMS/CVTUTF/ConvertUTF.c
Mike Stump	11289f4	2009-09-09 15:08:12 +0000	[diff] [blame]	423	// First, we determine how many bytes the result will require.
Steve Naroff	c94adda	2009-04-01 11:09:15 +0000	[diff] [blame]	424	typedef uint8_t UTF8;
Steve Naroff	7b753d2	2009-03-30 23:46:03 +0000	[diff] [blame]	425
				426	unsigned short bytesToWrite = 0;
				427	if (UcnVal < (UTF32)0x80)
				428	bytesToWrite = 1;
				429	else if (UcnVal < (UTF32)0x800)
				430	bytesToWrite = 2;
				431	else if (UcnVal < (UTF32)0x10000)
				432	bytesToWrite = 3;
				433	else
				434	bytesToWrite = 4;
Mike Stump	11289f4	2009-09-09 15:08:12 +0000	[diff] [blame]	435
Steve Naroff	7b753d2	2009-03-30 23:46:03 +0000	[diff] [blame]	436	const unsigned byteMask = 0xBF;
				437	const unsigned byteMark = 0x80;
Mike Stump	11289f4	2009-09-09 15:08:12 +0000	[diff] [blame]	438
Steve Naroff	7b753d2	2009-03-30 23:46:03 +0000	[diff] [blame]	439	// Once the bits are split out into bytes of UTF8, this is a mask OR-ed
Steve Naroff	f2a880c	2009-03-31 10:29:45 +0000	[diff] [blame]	440	// into the first byte, depending on how many bytes follow.
Mike Stump	11289f4	2009-09-09 15:08:12 +0000	[diff] [blame]	441	static const UTF8 firstByteMark[5] = {
Steve Naroff	f2a880c	2009-03-31 10:29:45 +0000	[diff] [blame]	442	0x00, 0x00, 0xC0, 0xE0, 0xF0
Steve Naroff	7b753d2	2009-03-30 23:46:03 +0000	[diff] [blame]	443	};
				444	// Finally, we write the bytes into ResultBuf.
				445	ResultBuf += bytesToWrite;
				446	switch (bytesToWrite) { // note: everything falls through.
Benjamin Kramer	f23a6e6	2012-11-08 19:22:26 +0000	[diff] [blame]	447	case 4: *--ResultBuf = (UTF8)((UcnVal \| byteMark) & byteMask); UcnVal >>= 6;
				448	case 3: *--ResultBuf = (UTF8)((UcnVal \| byteMark) & byteMask); UcnVal >>= 6;
				449	case 2: *--ResultBuf = (UTF8)((UcnVal \| byteMark) & byteMask); UcnVal >>= 6;
				450	case 1: *--ResultBuf = (UTF8) (UcnVal \| firstByteMark[bytesToWrite]);
Steve Naroff	7b753d2	2009-03-30 23:46:03 +0000	[diff] [blame]	451	}
				452	// Update the buffer.
				453	ResultBuf += bytesToWrite;
				454	}
Chris Lattner	2f5add6	2007-04-05 06:57:15 +0000	[diff] [blame]	455
				456
Steve Naroff	09ef474	2007-03-09 23:16:33 +0000	[diff] [blame]	457	/// integer-constant: [C99 6.4.4.1]
				458	/// decimal-constant integer-suffix
				459	/// octal-constant integer-suffix
				460	/// hexadecimal-constant integer-suffix
Richard Smith	f4198b7	2013-07-23 08:14:48 +0000	[diff] [blame]	461	/// binary-literal integer-suffix [GNU, C++1y]
Richard Smith	8129245	2012-03-08 21:59:28 +0000	[diff] [blame]	462	/// user-defined-integer-literal: [C++11 lex.ext]
Richard Smith	39570d00	2012-03-08 08:45:32 +0000	[diff] [blame]	463	/// decimal-literal ud-suffix
				464	/// octal-literal ud-suffix
				465	/// hexadecimal-literal ud-suffix
Richard Smith	f4198b7	2013-07-23 08:14:48 +0000	[diff] [blame]	466	/// binary-literal ud-suffix [GNU, C++1y]
Mike Stump	11289f4	2009-09-09 15:08:12 +0000	[diff] [blame]	467	/// decimal-constant:
Steve Naroff	09ef474	2007-03-09 23:16:33 +0000	[diff] [blame]	468	/// nonzero-digit
				469	/// decimal-constant digit
Mike Stump	11289f4	2009-09-09 15:08:12 +0000	[diff] [blame]	470	/// octal-constant:
Steve Naroff	09ef474	2007-03-09 23:16:33 +0000	[diff] [blame]	471	/// 0
				472	/// octal-constant octal-digit
Mike Stump	11289f4	2009-09-09 15:08:12 +0000	[diff] [blame]	473	/// hexadecimal-constant:
Steve Naroff	09ef474	2007-03-09 23:16:33 +0000	[diff] [blame]	474	/// hexadecimal-prefix hexadecimal-digit
				475	/// hexadecimal-constant hexadecimal-digit
				476	/// hexadecimal-prefix: one of
				477	/// 0x 0X
Richard Smith	f4198b7	2013-07-23 08:14:48 +0000	[diff] [blame]	478	/// binary-literal:
				479	/// 0b binary-digit
				480	/// 0B binary-digit
				481	/// binary-literal binary-digit
Steve Naroff	09ef474	2007-03-09 23:16:33 +0000	[diff] [blame]	482	/// integer-suffix:
				483	/// unsigned-suffix [long-suffix]
				484	/// unsigned-suffix [long-long-suffix]
				485	/// long-suffix [unsigned-suffix]
				486	/// long-long-suffix [unsigned-sufix]
				487	/// nonzero-digit:
				488	/// 1 2 3 4 5 6 7 8 9
				489	/// octal-digit:
				490	/// 0 1 2 3 4 5 6 7
				491	/// hexadecimal-digit:
				492	/// 0 1 2 3 4 5 6 7 8 9
				493	/// a b c d e f
				494	/// A B C D E F
Richard Smith	f4198b7	2013-07-23 08:14:48 +0000	[diff] [blame]	495	/// binary-digit:
				496	/// 0
				497	/// 1
Steve Naroff	09ef474	2007-03-09 23:16:33 +0000	[diff] [blame]	498	/// unsigned-suffix: one of
				499	/// u U
				500	/// long-suffix: one of
				501	/// l L
Mike Stump	11289f4	2009-09-09 15:08:12 +0000	[diff] [blame]	502	/// long-long-suffix: one of
Steve Naroff	09ef474	2007-03-09 23:16:33 +0000	[diff] [blame]	503	/// ll LL
				504	///
				505	/// floating-constant: [C99 6.4.4.2]
				506	/// TODO: add rules...
				507	///
Dmitri Gribenko	7ba9172	2012-09-24 09:53:54 +0000	[diff] [blame]	508	NumericLiteralParser::NumericLiteralParser(StringRef TokSpelling,
				509	SourceLocation TokLoc,
				510	Preprocessor &PP)
				511	: PP(PP), ThisTokBegin(TokSpelling.begin()), ThisTokEnd(TokSpelling.end()) {
Mike Stump	11289f4	2009-09-09 15:08:12 +0000	[diff] [blame]	512
Chris Lattner	59f09b6	2008-09-30 20:45:40 +0000	[diff] [blame]	513	// This routine assumes that the range begin/end matches the regex for integer
				514	// and FP constants (specifically, the 'pp-number' regex), and assumes that
				515	// the byte at "*end" is both valid and not part of the regex. Because of
				516	// this, it doesn't have to check for 'overscan' in various places.
Jordan Rose	a7d0384	2013-02-08 22:30:41 +0000	[diff] [blame]	517	assert(!isPreprocessingNumberBody(*ThisTokEnd) && "didn't maximally munch?");
Mike Stump	11289f4	2009-09-09 15:08:12 +0000	[diff] [blame]	518
Dmitri Gribenko	7ba9172	2012-09-24 09:53:54 +0000	[diff] [blame]	519	s = DigitsBegin = ThisTokBegin;
Steve Naroff	09ef474	2007-03-09 23:16:33 +0000	[diff] [blame]	520	saw_exponent = false;
				521	saw_period = false;
Richard Smith	39570d00	2012-03-08 08:45:32 +0000	[diff] [blame]	522	saw_ud_suffix = false;
Steve Naroff	09ef474	2007-03-09 23:16:33 +0000	[diff] [blame]	523	isLong = false;
				524	isUnsigned = false;
				525	isLongLong = false;
Anastasia Stulova	5c1a2c5	2016-02-17 11:34:37 +0000	[diff] [blame]	526	isHalf = false;
Chris Lattner	ed04542	2007-08-26 03:29:23 +0000	[diff] [blame]	527	isFloat = false;
Chris Lattner	f55ab18	2007-08-26 01:58:14 +0000	[diff] [blame]	528	isImaginary = false;
Nemanja Ivanovic	bb1ea2d	2016-05-09 08:52:33 +0000	[diff] [blame]	529	isFloat128 = false;
David Majnemer	65a407c	2014-06-21 18:46:07 +0000	[diff] [blame]	530	MicrosoftInteger = 0;
Steve Naroff	09ef474	2007-03-09 23:16:33 +0000	[diff] [blame]	531	hadError = false;
Mike Stump	11289f4	2009-09-09 15:08:12 +0000	[diff] [blame]	532
Steve Naroff	09ef474	2007-03-09 23:16:33 +0000	[diff] [blame]	533	if (*s == '0') { // parse radix
Chris Lattner	6016a51	2008-06-30 06:39:54 +0000	[diff] [blame]	534	ParseNumberStartingWithZero(TokLoc);
				535	if (hadError)
				536	return;
Steve Naroff	09ef474	2007-03-09 23:16:33 +0000	[diff] [blame]	537	} else { // the first digit is non-zero
				538	radix = 10;
				539	s = SkipDigits(s);
				540	if (s == ThisTokEnd) {
Chris Lattner	328fa5c	2007-06-08 17:12:06 +0000	[diff] [blame]	541	// Done.
Craig Topper	3efc7c0	2016-01-28 05:22:54 +0000	[diff] [blame]	542	} else {
				543	ParseDecimalOrOctalCommon(TokLoc);
				544	if (hadError)
Chris Lattner	48a9b9b	2008-04-20 18:41:46 +0000	[diff] [blame]	545	return;
Steve Naroff	09ef474	2007-03-09 23:16:33 +0000	[diff] [blame]	546	}
				547	}
				548
				549	SuffixBegin = s;
Richard Smith	1e13048	2013-09-26 04:19:11 +0000	[diff] [blame]	550	checkSeparator(TokLoc, s, CSK_AfterDigits);
Mike Stump	11289f4	2009-09-09 15:08:12 +0000	[diff] [blame]	551
Chris Lattner	f55ab18	2007-08-26 01:58:14 +0000	[diff] [blame]	552	// Parse the suffix. At this point we can classify whether we have an FP or
				553	// integer constant.
				554	bool isFPConstant = isFloatingLiteral();
Craig Topper	d2d442c	2014-05-17 23:10:59 +0000	[diff] [blame]	555	const char *ImaginarySuffixLoc = nullptr;
Mike Stump	11289f4	2009-09-09 15:08:12 +0000	[diff] [blame]	556
Chris Lattner	f55ab18	2007-08-26 01:58:14 +0000	[diff] [blame]	557	// Loop over all of the characters of the suffix. If we see something bad,
				558	// we break out of the loop.
				559	for (; s != ThisTokEnd; ++s) {
				560	switch (*s) {
Anastasia Stulova	5c1a2c5	2016-02-17 11:34:37 +0000	[diff] [blame]	561	case 'h': // FP Suffix for "half".
				562	case 'H':
				563	// OpenCL Extension v1.2 s9.5 - h or H suffix for half type.
				564	if (!PP.getLangOpts().Half) break;
				565	if (!isFPConstant) break; // Error for integer constant.
				566	if (isHalf \|\| isFloat \|\| isLong) break; // HH, FH, LH invalid.
				567	isHalf = true;
				568	continue; // Success.
Chris Lattner	f55ab18	2007-08-26 01:58:14 +0000	[diff] [blame]	569	case 'f': // FP Suffix for "float"
				570	case 'F':
				571	if (!isFPConstant) break; // Error for integer constant.
Nemanja Ivanovic	bb1ea2d	2016-05-09 08:52:33 +0000	[diff] [blame]	572	if (isHalf \|\| isFloat \|\| isLong \|\| isFloat128)
				573	break; // HF, FF, LF, QF invalid.
Chris Lattner	ed04542	2007-08-26 03:29:23 +0000	[diff] [blame]	574	isFloat = true;
Chris Lattner	f55ab18	2007-08-26 01:58:14 +0000	[diff] [blame]	575	continue; // Success.
Nemanja Ivanovic	bb1ea2d	2016-05-09 08:52:33 +0000	[diff] [blame]	576	case 'q': // FP Suffix for "__float128"
				577	case 'Q':
				578	if (!isFPConstant) break; // Error for integer constant.
				579	if (isHalf \|\| isFloat \|\| isLong \|\| isFloat128)
				580	break; // HQ, FQ, LQ, QQ invalid.
				581	isFloat128 = true;
				582	continue; // Success.
Chris Lattner	f55ab18	2007-08-26 01:58:14 +0000	[diff] [blame]	583	case 'u':
				584	case 'U':
				585	if (isFPConstant) break; // Error for floating constant.
				586	if (isUnsigned) break; // Cannot be repeated.
				587	isUnsigned = true;
				588	continue; // Success.
				589	case 'l':
				590	case 'L':
				591	if (isLong \|\| isLongLong) break; // Cannot be repeated.
Nemanja Ivanovic	bb1ea2d	2016-05-09 08:52:33 +0000	[diff] [blame]	592	if (isHalf \|\| isFloat \|\| isFloat128) break; // LH, LF, LQ invalid.
Mike Stump	11289f4	2009-09-09 15:08:12 +0000	[diff] [blame]	593
Chris Lattner	f55ab18	2007-08-26 01:58:14 +0000	[diff] [blame]	594	// Check for long long. The L's need to be adjacent and the same case.
Benjamin Kramer	7fd8838	2015-03-29 14:11:22 +0000	[diff] [blame]	595	if (s[1] == s[0]) {
				596	assert(s + 1 < ThisTokEnd && "didn't maximally munch?");
Chris Lattner	f55ab18	2007-08-26 01:58:14 +0000	[diff] [blame]	597	if (isFPConstant) break; // long long invalid for floats.
				598	isLongLong = true;
				599	++s; // Eat both of them.
				600	} else {
Steve Naroff	09ef474	2007-03-09 23:16:33 +0000	[diff] [blame]	601	isLong = true;
Steve Naroff	09ef474	2007-03-09 23:16:33 +0000	[diff] [blame]	602	}
Chris Lattner	f55ab18	2007-08-26 01:58:14 +0000	[diff] [blame]	603	continue; // Success.
				604	case 'i':
Chris Lattner	26f6c22	2010-10-14 00:24:10 +0000	[diff] [blame]	605	case 'I':
David Blaikie	bbafb8a	2012-03-11 07:00:24 +0000	[diff] [blame]	606	if (PP.getLangOpts().MicrosoftExt) {
David Majnemer	65a407c	2014-06-21 18:46:07 +0000	[diff] [blame]	607	if (isLong \|\| isLongLong \|\| MicrosoftInteger)
				608	break;
Nuno Lopes	baa1bc4	2009-11-28 13:37:52 +0000	[diff] [blame]	609
Benjamin Kramer	7fd8838	2015-03-29 14:11:22 +0000	[diff] [blame]	610	if (!isFPConstant) {
David Majnemer	5055dfc	2015-07-26 09:02:26 +0000	[diff] [blame]	611	// Allow i8, i16, i32, and i64.
Mike Stump	c99c022	2009-10-08 22:55:36 +0000	[diff] [blame]	612	switch (s[1]) {
Benjamin Kramer	7fd8838	2015-03-29 14:11:22 +0000	[diff] [blame]	613	case '8':
				614	s += 2; // i8 suffix
				615	MicrosoftInteger = 8;
Peter Collingbourne	efe09b4	2014-05-29 23:10:15 +0000	[diff] [blame]	616	break;
Benjamin Kramer	7fd8838	2015-03-29 14:11:22 +0000	[diff] [blame]	617	case '1':
				618	if (s[2] == '6') {
				619	s += 3; // i16 suffix
				620	MicrosoftInteger = 16;
Benjamin Kramer	7fd8838	2015-03-29 14:11:22 +0000	[diff] [blame]	621	}
				622	break;
				623	case '3':
				624	if (s[2] == '2') {
				625	s += 3; // i32 suffix
				626	MicrosoftInteger = 32;
				627	}
				628	break;
				629	case '6':
				630	if (s[2] == '4') {
				631	s += 3; // i64 suffix
				632	MicrosoftInteger = 64;
				633	}
				634	break;
				635	default:
				636	break;
				637	}
				638	}
				639	if (MicrosoftInteger) {
				640	assert(s <= ThisTokEnd && "didn't maximally munch?");
				641	break;
Steve Naroff	a1f4145	2008-04-04 21:02:54 +0000	[diff] [blame]	642	}
Steve Naroff	a1f4145	2008-04-04 21:02:54 +0000	[diff] [blame]	643	}
Richard Smith	2a98862	2013-09-24 04:06:10 +0000	[diff] [blame]	644	// "i", "if", and "il" are user-defined suffixes in C++1y.
Benjamin Kramer	7fd8838	2015-03-29 14:11:22 +0000	[diff] [blame]	645	if (*s == 'i' && PP.getLangOpts().CPlusPlus14)
Richard Smith	2a98862	2013-09-24 04:06:10 +0000	[diff] [blame]	646	break;
Steve Naroff	a1f4145	2008-04-04 21:02:54 +0000	[diff] [blame]	647	// fall through.
Chris Lattner	f55ab18	2007-08-26 01:58:14 +0000	[diff] [blame]	648	case 'j':
				649	case 'J':
				650	if (isImaginary) break; // Cannot be repeated.
Chris Lattner	f55ab18	2007-08-26 01:58:14 +0000	[diff] [blame]	651	isImaginary = true;
Richard Smith	f4198b7	2013-07-23 08:14:48 +0000	[diff] [blame]	652	ImaginarySuffixLoc = s;
Chris Lattner	f55ab18	2007-08-26 01:58:14 +0000	[diff] [blame]	653	continue; // Success.
Steve Naroff	09ef474	2007-03-09 23:16:33 +0000	[diff] [blame]	654	}
Richard Smith	39570d00	2012-03-08 08:45:32 +0000	[diff] [blame]	655	// If we reached here, there was an error or a ud-suffix.
Chris Lattner	f55ab18	2007-08-26 01:58:14 +0000	[diff] [blame]	656	break;
				657	}
Mike Stump	11289f4	2009-09-09 15:08:12 +0000	[diff] [blame]	658
Chris Lattner	f55ab18	2007-08-26 01:58:14 +0000	[diff] [blame]	659	if (s != ThisTokEnd) {
Richard Smith	8b7258b	2014-02-17 21:52:30 +0000	[diff] [blame]	660	// FIXME: Don't bother expanding UCNs if !tok.hasUCN().
				661	expandUCNs(UDSuffixBuf, StringRef(SuffixBegin, ThisTokEnd - SuffixBegin));
				662	if (isValidUDSuffix(PP.getLangOpts(), UDSuffixBuf)) {
Richard Smith	f4198b7	2013-07-23 08:14:48 +0000	[diff] [blame]	663	// Any suffix pieces we might have parsed are actually part of the
				664	// ud-suffix.
				665	isLong = false;
				666	isUnsigned = false;
				667	isLongLong = false;
				668	isFloat = false;
Anastasia Stulova	5c1a2c5	2016-02-17 11:34:37 +0000	[diff] [blame]	669	isHalf = false;
Richard Smith	f4198b7	2013-07-23 08:14:48 +0000	[diff] [blame]	670	isImaginary = false;
David Majnemer	65a407c	2014-06-21 18:46:07 +0000	[diff] [blame]	671	MicrosoftInteger = 0;
Richard Smith	f4198b7	2013-07-23 08:14:48 +0000	[diff] [blame]	672
Richard Smith	39570d00	2012-03-08 08:45:32 +0000	[diff] [blame]	673	saw_ud_suffix = true;
				674	return;
				675	}
				676
				677	// Report an error if there are any.
Dmitri Gribenko	7ba9172	2012-09-24 09:53:54 +0000	[diff] [blame]	678	PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, SuffixBegin - ThisTokBegin),
Craig Topper	71a51ff	2015-11-12 07:36:50 +0000	[diff] [blame]	679	diag::err_invalid_suffix_constant)
				680	<< StringRef(SuffixBegin, ThisTokEnd-SuffixBegin) << isFPConstant;
Chris Lattner	59acca5	2008-11-22 07:23:31 +0000	[diff] [blame]	681	hadError = true;
Chris Lattner	f55ab18	2007-08-26 01:58:14 +0000	[diff] [blame]	682	return;
Steve Naroff	09ef474	2007-03-09 23:16:33 +0000	[diff] [blame]	683	}
Richard Smith	f4198b7	2013-07-23 08:14:48 +0000	[diff] [blame]	684
				685	if (isImaginary) {
				686	PP.Diag(PP.AdvanceToTokenCharacter(TokLoc,
				687	ImaginarySuffixLoc - ThisTokBegin),
				688	diag::ext_imaginary_constant);
				689	}
				690	}
				691
Craig Topper	3efc7c0	2016-01-28 05:22:54 +0000	[diff] [blame]	692	/// ParseDecimalOrOctalCommon - This method is called for decimal or octal
				693	/// numbers. It issues an error for illegal digits, and handles floating point
				694	/// parsing. If it detects a floating point number, the radix is set to 10.
				695	void NumericLiteralParser::ParseDecimalOrOctalCommon(SourceLocation TokLoc){
				696	assert((radix == 8 \|\| radix == 10) && "Unexpected radix");
				697
				698	// If we have a hex digit other than 'e' (which denotes a FP exponent) then
				699	// the code is using an incorrect base.
				700	if (isHexDigit(s) && s != 'e' && *s != 'E') {
				701	PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, s-ThisTokBegin),
				702	diag::err_invalid_digit) << StringRef(s, 1) << (radix == 8 ? 1 : 0);
				703	hadError = true;
				704	return;
				705	}
				706
				707	if (*s == '.') {
				708	checkSeparator(TokLoc, s, CSK_AfterDigits);
				709	s++;
				710	radix = 10;
				711	saw_period = true;
				712	checkSeparator(TokLoc, s, CSK_BeforeDigits);
				713	s = SkipDigits(s); // Skip suffix.
				714	}
				715	if (s == 'e' \|\| s == 'E') { // exponent
				716	checkSeparator(TokLoc, s, CSK_AfterDigits);
				717	const char *Exponent = s;
				718	s++;
				719	radix = 10;
				720	saw_exponent = true;
				721	if (s == '+' \|\| s == '-') s++; // sign
				722	const char *first_non_digit = SkipDigits(s);
Richard Smith	b1cba3e	2016-02-09 22:34:35 +0000	[diff] [blame]	723	if (containsDigits(s, first_non_digit)) {
Craig Topper	3efc7c0	2016-01-28 05:22:54 +0000	[diff] [blame]	724	checkSeparator(TokLoc, s, CSK_BeforeDigits);
				725	s = first_non_digit;
				726	} else {
				727	PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, Exponent-ThisTokBegin),
				728	diag::err_exponent_has_no_digits);
				729	hadError = true;
				730	return;
				731	}
				732	}
				733	}
				734
Richard Smith	f4198b7	2013-07-23 08:14:48 +0000	[diff] [blame]	735	/// Determine whether a suffix is a valid ud-suffix. We avoid treating reserved
				736	/// suffixes as ud-suffixes, because the diagnostic experience is better if we
				737	/// treat it as an invalid suffix.
				738	bool NumericLiteralParser::isValidUDSuffix(const LangOptions &LangOpts,
				739	StringRef Suffix) {
				740	if (!LangOpts.CPlusPlus11 \|\| Suffix.empty())
				741	return false;
				742
				743	// By C++11 [lex.ext]p10, ud-suffixes starting with an '_' are always valid.
				744	if (Suffix[0] == '_')
				745	return true;
				746
				747	// In C++11, there are no library suffixes.
Aaron Ballman	dd69ef3	2014-08-19 15:55:55 +0000	[diff] [blame]	748	if (!LangOpts.CPlusPlus14)
Richard Smith	f4198b7	2013-07-23 08:14:48 +0000	[diff] [blame]	749	return false;
				750
				751	// In C++1y, "s", "h", "min", "ms", "us", and "ns" are used in the library.
Richard Smith	2a98862	2013-09-24 04:06:10 +0000	[diff] [blame]	752	// Per tweaked N3660, "il", "i", and "if" are also used in the library.
Richard Smith	f4198b7	2013-07-23 08:14:48 +0000	[diff] [blame]	753	return llvm::StringSwitch<bool>(Suffix)
				754	.Cases("h", "min", "s", true)
				755	.Cases("ms", "us", "ns", true)
Richard Smith	2a98862	2013-09-24 04:06:10 +0000	[diff] [blame]	756	.Cases("il", "i", "if", true)
Richard Smith	f4198b7	2013-07-23 08:14:48 +0000	[diff] [blame]	757	.Default(false);
Steve Naroff	09ef474	2007-03-09 23:16:33 +0000	[diff] [blame]	758	}
				759
Richard Smith	fde9485	2013-09-26 03:33:06 +0000	[diff] [blame]	760	void NumericLiteralParser::checkSeparator(SourceLocation TokLoc,
Richard Smith	1e13048	2013-09-26 04:19:11 +0000	[diff] [blame]	761	const char *Pos,
				762	CheckSeparatorKind IsAfterDigits) {
				763	if (IsAfterDigits == CSK_AfterDigits) {
Richard Smith	99dc071	2013-09-26 05:57:03 +0000	[diff] [blame]	764	if (Pos == ThisTokBegin)
				765	return;
Richard Smith	fde9485	2013-09-26 03:33:06 +0000	[diff] [blame]	766	--Pos;
Richard Smith	99dc071	2013-09-26 05:57:03 +0000	[diff] [blame]	767	} else if (Pos == ThisTokEnd)
				768	return;
Richard Smith	fde9485	2013-09-26 03:33:06 +0000	[diff] [blame]	769
				770	if (isDigitSeparator(*Pos))
				771	PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, Pos - ThisTokBegin),
				772	diag::err_digit_separator_not_between_digits)
				773	<< IsAfterDigits;
				774	}
				775
Chris Lattner	6016a51	2008-06-30 06:39:54 +0000	[diff] [blame]	776	/// ParseNumberStartingWithZero - This method is called when the first character
				777	/// of the number is found to be a zero. This means it is either an octal
				778	/// number (like '04') or a hex number ('0x123a') a binary number ('0b1010') or
Mike Stump	11289f4	2009-09-09 15:08:12 +0000	[diff] [blame]	779	/// a floating point number (01239.123e4). Eat the prefix, determining the
Chris Lattner	6016a51	2008-06-30 06:39:54 +0000	[diff] [blame]	780	/// radix etc.
				781	void NumericLiteralParser::ParseNumberStartingWithZero(SourceLocation TokLoc) {
				782	assert(s[0] == '0' && "Invalid method call");
				783	s++;
Mike Stump	11289f4	2009-09-09 15:08:12 +0000	[diff] [blame]	784
NAKAMURA Takumi	f2bc8f3	2013-09-27 04:42:28 +0000	[diff] [blame]	785	int c1 = s[0];
NAKAMURA Takumi	f2bc8f3	2013-09-27 04:42:28 +0000	[diff] [blame]	786
Chris Lattner	6016a51	2008-06-30 06:39:54 +0000	[diff] [blame]	787	// Handle a hex number like 0x1234.
Benjamin Kramer	8671028	2015-03-29 14:11:37 +0000	[diff] [blame]	788	if ((c1 == 'x' \|\| c1 == 'X') && (isHexDigit(s[1]) \|\| s[1] == '.')) {
Chris Lattner	6016a51	2008-06-30 06:39:54 +0000	[diff] [blame]	789	s++;
Benjamin Kramer	8671028	2015-03-29 14:11:37 +0000	[diff] [blame]	790	assert(s < ThisTokEnd && "didn't maximally munch?");
Chris Lattner	6016a51	2008-06-30 06:39:54 +0000	[diff] [blame]	791	radix = 16;
				792	DigitsBegin = s;
				793	s = SkipHexDigits(s);
Richard Smith	b1cba3e	2016-02-09 22:34:35 +0000	[diff] [blame]	794	bool HasSignificandDigits = containsDigits(DigitsBegin, s);
Chris Lattner	6016a51	2008-06-30 06:39:54 +0000	[diff] [blame]	795	if (s == ThisTokEnd) {
				796	// Done.
				797	} else if (*s == '.') {
				798	s++;
				799	saw_period = true;
Aaron Ballman	e1224a5	2012-02-08 13:36:33 +0000	[diff] [blame]	800	const char *floatDigitsBegin = s;
Chris Lattner	6016a51	2008-06-30 06:39:54 +0000	[diff] [blame]	801	s = SkipHexDigits(s);
Richard Smith	b1cba3e	2016-02-09 22:34:35 +0000	[diff] [blame]	802	if (containsDigits(floatDigitsBegin, s))
				803	HasSignificandDigits = true;
				804	if (HasSignificandDigits)
				805	checkSeparator(TokLoc, floatDigitsBegin, CSK_BeforeDigits);
Chris Lattner	6016a51	2008-06-30 06:39:54 +0000	[diff] [blame]	806	}
Aaron Ballman	e1224a5	2012-02-08 13:36:33 +0000	[diff] [blame]	807
Richard Smith	b1cba3e	2016-02-09 22:34:35 +0000	[diff] [blame]	808	if (!HasSignificandDigits) {
Dmitri Gribenko	7ba9172	2012-09-24 09:53:54 +0000	[diff] [blame]	809	PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, s - ThisTokBegin),
Richard Smith	560a357	2016-03-04 22:32:06 +0000	[diff] [blame]	810	diag::err_hex_constant_requires)
				811	<< PP.getLangOpts().CPlusPlus << 1;
Aaron Ballman	e1224a5	2012-02-08 13:36:33 +0000	[diff] [blame]	812	hadError = true;
				813	return;
				814	}
				815
Chris Lattner	6016a51	2008-06-30 06:39:54 +0000	[diff] [blame]	816	// A binary exponent can appear with or with a '.'. If dotted, the
Mike Stump	11289f4	2009-09-09 15:08:12 +0000	[diff] [blame]	817	// binary exponent is required.
Douglas Gregor	86325ad	2011-08-30 22:40:35 +0000	[diff] [blame]	818	if (s == 'p' \|\| s == 'P') {
Richard Smith	70ee92f	2014-04-22 23:50:25 +0000	[diff] [blame]	819	checkSeparator(TokLoc, s, CSK_AfterDigits);
Chris Lattner	6016a51	2008-06-30 06:39:54 +0000	[diff] [blame]	820	const char *Exponent = s;
				821	s++;
				822	saw_exponent = true;
				823	if (s == '+' \|\| s == '-') s++; // sign
				824	const char *first_non_digit = SkipDigits(s);
Richard Smith	b1cba3e	2016-02-09 22:34:35 +0000	[diff] [blame]	825	if (!containsDigits(s, first_non_digit)) {
Chris Lattner	59acca5	2008-11-22 07:23:31 +0000	[diff] [blame]	826	PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, Exponent-ThisTokBegin),
				827	diag::err_exponent_has_no_digits);
				828	hadError = true;
Chris Lattner	c94ad4a	2008-07-25 18:18:34 +0000	[diff] [blame]	829	return;
Chris Lattner	6016a51	2008-06-30 06:39:54 +0000	[diff] [blame]	830	}
Richard Smith	70ee92f	2014-04-22 23:50:25 +0000	[diff] [blame]	831	checkSeparator(TokLoc, s, CSK_BeforeDigits);
Chris Lattner	c94ad4a	2008-07-25 18:18:34 +0000	[diff] [blame]	832	s = first_non_digit;
Mike Stump	11289f4	2009-09-09 15:08:12 +0000	[diff] [blame]	833
David Blaikie	bbafb8a	2012-03-11 07:00:24 +0000	[diff] [blame]	834	if (!PP.getLangOpts().HexFloats)
Richard Smith	560a357	2016-03-04 22:32:06 +0000	[diff] [blame]	835	PP.Diag(TokLoc, PP.getLangOpts().CPlusPlus
				836	? diag::ext_hex_literal_invalid
				837	: diag::ext_hex_constant_invalid);
				838	else if (PP.getLangOpts().CPlusPlus1z)
				839	PP.Diag(TokLoc, diag::warn_cxx1z_hex_literal);
Chris Lattner	6016a51	2008-06-30 06:39:54 +0000	[diff] [blame]	840	} else if (saw_period) {
Richard Smith	560a357	2016-03-04 22:32:06 +0000	[diff] [blame]	841	PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, s - ThisTokBegin),
				842	diag::err_hex_constant_requires)
				843	<< PP.getLangOpts().CPlusPlus << 0;
Chris Lattner	59acca5	2008-11-22 07:23:31 +0000	[diff] [blame]	844	hadError = true;
Chris Lattner	6016a51	2008-06-30 06:39:54 +0000	[diff] [blame]	845	}
				846	return;
				847	}
Mike Stump	11289f4	2009-09-09 15:08:12 +0000	[diff] [blame]	848
Chris Lattner	6016a51	2008-06-30 06:39:54 +0000	[diff] [blame]	849	// Handle simple binary numbers 0b01010
Benjamin Kramer	8671028	2015-03-29 14:11:37 +0000	[diff] [blame]	850	if ((c1 == 'b' \|\| c1 == 'B') && (s[1] == '0' \|\| s[1] == '1')) {
Richard Smith	c5c27f2	2013-04-19 20:47:20 +0000	[diff] [blame]	851	// 0b101010 is a C++1y / GCC extension.
				852	PP.Diag(TokLoc,
Aaron Ballman	dd69ef3	2014-08-19 15:55:55 +0000	[diff] [blame]	853	PP.getLangOpts().CPlusPlus14
Richard Smith	c5c27f2	2013-04-19 20:47:20 +0000	[diff] [blame]	854	? diag::warn_cxx11_compat_binary_literal
				855	: PP.getLangOpts().CPlusPlus
Aaron Ballman	dd69ef3	2014-08-19 15:55:55 +0000	[diff] [blame]	856	? diag::ext_binary_literal_cxx14
Richard Smith	c5c27f2	2013-04-19 20:47:20 +0000	[diff] [blame]	857	: diag::ext_binary_literal);
Chris Lattner	6016a51	2008-06-30 06:39:54 +0000	[diff] [blame]	858	++s;
Benjamin Kramer	8671028	2015-03-29 14:11:37 +0000	[diff] [blame]	859	assert(s < ThisTokEnd && "didn't maximally munch?");
Chris Lattner	6016a51	2008-06-30 06:39:54 +0000	[diff] [blame]	860	radix = 2;
				861	DigitsBegin = s;
				862	s = SkipBinaryDigits(s);
				863	if (s == ThisTokEnd) {
				864	// Done.
Jordan Rose	a7d0384	2013-02-08 22:30:41 +0000	[diff] [blame]	865	} else if (isHexDigit(*s)) {
Chris Lattner	59acca5	2008-11-22 07:23:31 +0000	[diff] [blame]	866	PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, s-ThisTokBegin),
Craig Topper	7f5ff21	2015-11-14 02:09:55 +0000	[diff] [blame]	867	diag::err_invalid_digit) << StringRef(s, 1) << 2;
Chris Lattner	59acca5	2008-11-22 07:23:31 +0000	[diff] [blame]	868	hadError = true;
Chris Lattner	6016a51	2008-06-30 06:39:54 +0000	[diff] [blame]	869	}
Chris Lattner	d68c04f	2008-06-30 06:44:49 +0000	[diff] [blame]	870	// Other suffixes will be diagnosed by the caller.
Chris Lattner	6016a51	2008-06-30 06:39:54 +0000	[diff] [blame]	871	return;
				872	}
Mike Stump	11289f4	2009-09-09 15:08:12 +0000	[diff] [blame]	873
Chris Lattner	6016a51	2008-06-30 06:39:54 +0000	[diff] [blame]	874	// For now, the radix is set to 8. If we discover that we have a
				875	// floating point constant, the radix will change to 10. Octal floating
Mike Stump	11289f4	2009-09-09 15:08:12 +0000	[diff] [blame]	876	// point constants are not permitted (only decimal and hexadecimal).
Chris Lattner	6016a51	2008-06-30 06:39:54 +0000	[diff] [blame]	877	radix = 8;
				878	DigitsBegin = s;
				879	s = SkipOctalDigits(s);
				880	if (s == ThisTokEnd)
				881	return; // Done, simple octal number like 01234
Mike Stump	11289f4	2009-09-09 15:08:12 +0000	[diff] [blame]	882
Chris Lattner	d68c04f	2008-06-30 06:44:49 +0000	[diff] [blame]	883	// If we have some other non-octal digit that is a decimal digit, see if
				884	// this is part of a floating point number like 094.123 or 09e1.
Jordan Rose	a7d0384	2013-02-08 22:30:41 +0000	[diff] [blame]	885	if (isDigit(*s)) {
Chris Lattner	d68c04f	2008-06-30 06:44:49 +0000	[diff] [blame]	886	const char *EndDecimal = SkipDigits(s);
				887	if (EndDecimal[0] == '.' \|\| EndDecimal[0] == 'e' \|\| EndDecimal[0] == 'E') {
				888	s = EndDecimal;
				889	radix = 10;
				890	}
				891	}
Mike Stump	11289f4	2009-09-09 15:08:12 +0000	[diff] [blame]	892
Craig Topper	3efc7c0	2016-01-28 05:22:54 +0000	[diff] [blame]	893	ParseDecimalOrOctalCommon(TokLoc);
Chris Lattner	6016a51	2008-06-30 06:39:54 +0000	[diff] [blame]	894	}
				895
Jordan Rose	de584de	2012-09-25 22:32:51 +0000	[diff] [blame]	896	static bool alwaysFitsInto64Bits(unsigned Radix, unsigned NumDigits) {
Dmitri Gribenko	511288b	2012-09-25 19:09:15 +0000	[diff] [blame]	897	switch (Radix) {
				898	case 2:
				899	return NumDigits <= 64;
				900	case 8:
				901	return NumDigits <= 64 / 3; // Digits are groups of 3 bits.
				902	case 10:
				903	return NumDigits <= 19; // floor(log10(2^64))
				904	case 16:
				905	return NumDigits <= 64 / 4; // Digits are groups of 4 bits.
				906	default:
				907	llvm_unreachable("impossible Radix");
				908	}
				909	}
Chris Lattner	6016a51	2008-06-30 06:39:54 +0000	[diff] [blame]	910
Chris Lattner	5b743d3	2007-04-04 05:52:58 +0000	[diff] [blame]	911	/// GetIntegerValue - Convert this numeric literal value to an APInt that
Chris Lattner	871b4e1	2007-04-04 06:36:34 +0000	[diff] [blame]	912	/// matches Val's input width. If there is an overflow, set Val to the low bits
				913	/// of the result and return true. Otherwise, return false.
Chris Lattner	23b7eb6	2007-06-15 23:05:46 +0000	[diff] [blame]	914	bool NumericLiteralParser::GetIntegerValue(llvm::APInt &Val) {
Daniel Dunbar	be94708	2008-10-16 07:32:01 +0000	[diff] [blame]	915	// Fast path: Compute a conservative bound on the maximum number of
				916	// bits per digit in this radix. If we can't possibly overflow a
				917	// uint64 based on that bound then do the simple conversion to
				918	// integer. This avoids the expensive overflow checking below, and
				919	// handles the common cases that matter (small decimal integers and
				920	// hex/octal values which don't overflow).
Dmitri Gribenko	511288b	2012-09-25 19:09:15 +0000	[diff] [blame]	921	const unsigned NumDigits = SuffixBegin - DigitsBegin;
Jordan Rose	de584de	2012-09-25 22:32:51 +0000	[diff] [blame]	922	if (alwaysFitsInto64Bits(radix, NumDigits)) {
Daniel Dunbar	be94708	2008-10-16 07:32:01 +0000	[diff] [blame]	923	uint64_t N = 0;
Dmitri Gribenko	511288b	2012-09-25 19:09:15 +0000	[diff] [blame]	924	for (const char *Ptr = DigitsBegin; Ptr != SuffixBegin; ++Ptr)
Richard Smith	fde9485	2013-09-26 03:33:06 +0000	[diff] [blame]	925	if (!isDigitSeparator(*Ptr))
				926	N = N * radix + llvm::hexDigitValue(*Ptr);
Daniel Dunbar	be94708	2008-10-16 07:32:01 +0000	[diff] [blame]	927
				928	// This will truncate the value to Val's input width. Simply check
				929	// for overflow by comparing.
				930	Val = N;
				931	return Val.getZExtValue() != N;
				932	}
				933
Chris Lattner	5b743d3	2007-04-04 05:52:58 +0000	[diff] [blame]	934	Val = 0;
Dmitri Gribenko	511288b	2012-09-25 19:09:15 +0000	[diff] [blame]	935	const char *Ptr = DigitsBegin;
Chris Lattner	5b743d3	2007-04-04 05:52:58 +0000	[diff] [blame]	936
Chris Lattner	23b7eb6	2007-06-15 23:05:46 +0000	[diff] [blame]	937	llvm::APInt RadixVal(Val.getBitWidth(), radix);
				938	llvm::APInt CharVal(Val.getBitWidth(), 0);
				939	llvm::APInt OldVal = Val;
Mike Stump	11289f4	2009-09-09 15:08:12 +0000	[diff] [blame]	940
Chris Lattner	871b4e1	2007-04-04 06:36:34 +0000	[diff] [blame]	941	bool OverflowOccurred = false;
Dmitri Gribenko	511288b	2012-09-25 19:09:15 +0000	[diff] [blame]	942	while (Ptr < SuffixBegin) {
Richard Smith	fde9485	2013-09-26 03:33:06 +0000	[diff] [blame]	943	if (isDigitSeparator(*Ptr)) {
				944	++Ptr;
				945	continue;
				946	}
				947
Jordan Rose	78ed86a	2013-01-18 22:33:58 +0000	[diff] [blame]	948	unsigned C = llvm::hexDigitValue(*Ptr++);
Mike Stump	11289f4	2009-09-09 15:08:12 +0000	[diff] [blame]	949
Chris Lattner	5b743d3	2007-04-04 05:52:58 +0000	[diff] [blame]	950	// If this letter is out of bound for this radix, reject it.
Chris Lattner	531efa4	2007-04-04 06:49:26 +0000	[diff] [blame]	951	assert(C < radix && "NumericLiteralParser ctor should have rejected this");
Mike Stump	11289f4	2009-09-09 15:08:12 +0000	[diff] [blame]	952
Chris Lattner	5b743d3	2007-04-04 05:52:58 +0000	[diff] [blame]	953	CharVal = C;
Mike Stump	11289f4	2009-09-09 15:08:12 +0000	[diff] [blame]	954
Chris Lattner	871b4e1	2007-04-04 06:36:34 +0000	[diff] [blame]	955	// Add the digit to the value in the appropriate radix. If adding in digits
				956	// made the value smaller, then this overflowed.
Chris Lattner	5b743d3	2007-04-04 05:52:58 +0000	[diff] [blame]	957	OldVal = Val;
Chris Lattner	871b4e1	2007-04-04 06:36:34 +0000	[diff] [blame]	958
				959	// Multiply by radix, did overflow occur on the multiply?
Chris Lattner	5b743d3	2007-04-04 05:52:58 +0000	[diff] [blame]	960	Val *= RadixVal;
Chris Lattner	871b4e1	2007-04-04 06:36:34 +0000	[diff] [blame]	961	OverflowOccurred \|= Val.udiv(RadixVal) != OldVal;
				962
Chris Lattner	871b4e1	2007-04-04 06:36:34 +0000	[diff] [blame]	963	// Add value, did overflow occur on the value?
Daniel Dunbar	b1f6442	2008-10-16 06:39:30 +0000	[diff] [blame]	964	// (a + b) ult b <=> overflow
Chris Lattner	5b743d3	2007-04-04 05:52:58 +0000	[diff] [blame]	965	Val += CharVal;
Chris Lattner	871b4e1	2007-04-04 06:36:34 +0000	[diff] [blame]	966	OverflowOccurred \|= Val.ult(CharVal);
Chris Lattner	5b743d3	2007-04-04 05:52:58 +0000	[diff] [blame]	967	}
Chris Lattner	871b4e1	2007-04-04 06:36:34 +0000	[diff] [blame]	968	return OverflowOccurred;
Chris Lattner	5b743d3	2007-04-04 05:52:58 +0000	[diff] [blame]	969	}
				970
John McCall	53b93a0	2009-12-24 09:08:04 +0000	[diff] [blame]	971	llvm::APFloat::opStatus
				972	NumericLiteralParser::GetFloatValue(llvm::APFloat &Result) {
Ted Kremenek	fbb08bc	2007-11-26 23:12:30 +0000	[diff] [blame]	973	using llvm::APFloat;
Mike Stump	11289f4	2009-09-09 15:08:12 +0000	[diff] [blame]	974
Erick Tryzelaar	b907311	2009-08-16 23:36:28 +0000	[diff] [blame]	975	unsigned n = std::min(SuffixBegin - ThisTokBegin, ThisTokEnd - ThisTokBegin);
Richard Smith	fde9485	2013-09-26 03:33:06 +0000	[diff] [blame]	976
				977	llvm::SmallString<16> Buffer;
				978	StringRef Str(ThisTokBegin, n);
				979	if (Str.find('\'') != StringRef::npos) {
				980	Buffer.reserve(n);
				981	std::remove_copy_if(Str.begin(), Str.end(), std::back_inserter(Buffer),
				982	&isDigitSeparator);
				983	Str = Buffer;
				984	}
				985
				986	return Result.convertFromString(Str, APFloat::rmNearestTiesToEven);
Steve Naroff	97b9e91	2007-07-09 23:53:58 +0000	[diff] [blame]	987	}
Chris Lattner	5b743d3	2007-04-04 05:52:58 +0000	[diff] [blame]	988
Chris Lattner	2f5add6	2007-04-05 06:57:15 +0000	[diff] [blame]	989
James Dennett	1cc2203	2012-06-17 03:34:42 +0000	[diff] [blame]	990	/// \verbatim
Richard Smith	e18f0fa	2012-03-05 04:02:15 +0000	[diff] [blame]	991	/// user-defined-character-literal: [C++11 lex.ext]
				992	/// character-literal ud-suffix
				993	/// ud-suffix:
				994	/// identifier
				995	/// character-literal: [C++11 lex.ccon]
Craig Topper	54edcca	2011-08-11 04:06:15 +0000	[diff] [blame]	996	/// ' c-char-sequence '
				997	/// u' c-char-sequence '
				998	/// U' c-char-sequence '
				999	/// L' c-char-sequence '
Aaron Ballman	9a17c85	2016-01-07 20:59:26 +0000	[diff] [blame]	1000	/// u8' c-char-sequence ' [C++1z lex.ccon]
Craig Topper	54edcca	2011-08-11 04:06:15 +0000	[diff] [blame]	1001	/// c-char-sequence:
				1002	/// c-char
				1003	/// c-char-sequence c-char
				1004	/// c-char:
				1005	/// any member of the source character set except the single-quote ',
				1006	/// backslash \, or new-line character
				1007	/// escape-sequence
				1008	/// universal-character-name
Richard Smith	e18f0fa	2012-03-05 04:02:15 +0000	[diff] [blame]	1009	/// escape-sequence:
Craig Topper	54edcca	2011-08-11 04:06:15 +0000	[diff] [blame]	1010	/// simple-escape-sequence
				1011	/// octal-escape-sequence
				1012	/// hexadecimal-escape-sequence
				1013	/// simple-escape-sequence:
NAKAMURA Takumi	9f8a02d	2011-08-12 05:49:51 +0000	[diff] [blame]	1014	/// one of \' \" \? \\ \a \b \f \n \r \t \v
Craig Topper	54edcca	2011-08-11 04:06:15 +0000	[diff] [blame]	1015	/// octal-escape-sequence:
				1016	/// \ octal-digit
				1017	/// \ octal-digit octal-digit
				1018	/// \ octal-digit octal-digit octal-digit
				1019	/// hexadecimal-escape-sequence:
				1020	/// \x hexadecimal-digit
				1021	/// hexadecimal-escape-sequence hexadecimal-digit
Richard Smith	e18f0fa	2012-03-05 04:02:15 +0000	[diff] [blame]	1022	/// universal-character-name: [C++11 lex.charset]
Craig Topper	54edcca	2011-08-11 04:06:15 +0000	[diff] [blame]	1023	/// \u hex-quad
				1024	/// \U hex-quad hex-quad
				1025	/// hex-quad:
				1026	/// hex-digit hex-digit hex-digit hex-digit
James Dennett	1cc2203	2012-06-17 03:34:42 +0000	[diff] [blame]	1027	/// \endverbatim
Craig Topper	54edcca	2011-08-11 04:06:15 +0000	[diff] [blame]	1028	///
Chris Lattner	2f5add6	2007-04-05 06:57:15 +0000	[diff] [blame]	1029	CharLiteralParser::CharLiteralParser(const char begin, const char end,
Douglas Gregor	fb65e59	2011-07-27 05:40:30 +0000	[diff] [blame]	1030	SourceLocation Loc, Preprocessor &PP,
				1031	tok::TokenKind kind) {
Seth Cantrell	8b2b677	2012-01-18 12:27:04 +0000	[diff] [blame]	1032	// At this point we know that the character matches the regex "(L\|u\|U)?'.*'".
Chris Lattner	2f5add6	2007-04-05 06:57:15 +0000	[diff] [blame]	1033	HadError = false;
Mike Stump	11289f4	2009-09-09 15:08:12 +0000	[diff] [blame]	1034
Douglas Gregor	fb65e59	2011-07-27 05:40:30 +0000	[diff] [blame]	1035	Kind = kind;
				1036
Richard Smith	2a70e65	2012-03-09 22:27:51 +0000	[diff] [blame]	1037	const char *TokBegin = begin;
				1038
Seth Cantrell	8b2b677	2012-01-18 12:27:04 +0000	[diff] [blame]	1039	// Skip over wide character determinant.
Richard Smith	3e3a705	2014-11-08 06:08:42 +0000	[diff] [blame]	1040	if (Kind != tok::char_constant)
Douglas Gregor	fb65e59	2011-07-27 05:40:30 +0000	[diff] [blame]	1041	++begin;
Richard Smith	3e3a705	2014-11-08 06:08:42 +0000	[diff] [blame]	1042	if (Kind == tok::utf8_char_constant)
				1043	++begin;
Mike Stump	11289f4	2009-09-09 15:08:12 +0000	[diff] [blame]	1044
Chris Lattner	2f5add6	2007-04-05 06:57:15 +0000	[diff] [blame]	1045	// Skip over the entry quote.
				1046	assert(begin[0] == '\'' && "Invalid token lexed");
				1047	++begin;
				1048
Richard Smith	e18f0fa	2012-03-05 04:02:15 +0000	[diff] [blame]	1049	// Remove an optional ud-suffix.
				1050	if (end[-1] != '\'') {
				1051	const char *UDSuffixEnd = end;
				1052	do {
				1053	--end;
				1054	} while (end[-1] != '\'');
Richard Smith	8b7258b	2014-02-17 21:52:30 +0000	[diff] [blame]	1055	// FIXME: Don't bother with this if !tok.hasUCN().
				1056	expandUCNs(UDSuffixBuf, StringRef(end, UDSuffixEnd - end));
Richard Smith	2a70e65	2012-03-09 22:27:51 +0000	[diff] [blame]	1057	UDSuffixOffset = end - TokBegin;
Richard Smith	e18f0fa	2012-03-05 04:02:15 +0000	[diff] [blame]	1058	}
				1059
Seth Cantrell	8b2b677	2012-01-18 12:27:04 +0000	[diff] [blame]	1060	// Trim the ending quote.
Richard Smith	e18f0fa	2012-03-05 04:02:15 +0000	[diff] [blame]	1061	assert(end != begin && "Invalid token lexed");
Seth Cantrell	8b2b677	2012-01-18 12:27:04 +0000	[diff] [blame]	1062	--end;
				1063
Mike Stump	11289f4	2009-09-09 15:08:12 +0000	[diff] [blame]	1064	// FIXME: The "Value" is an uint64_t so we can handle char literals of
Chris Lattner	57540c5	2011-04-15 05:22:18 +0000	[diff] [blame]	1065	// up to 64-bits.
Chris Lattner	2f5add6	2007-04-05 06:57:15 +0000	[diff] [blame]	1066	// FIXME: This extensively assumes that 'char' is 8-bits.
Chris Lattner	37e0587	2008-03-05 18:54:05 +0000	[diff] [blame]	1067	assert(PP.getTargetInfo().getCharWidth() == 8 &&
Chris Lattner	2f5add6	2007-04-05 06:57:15 +0000	[diff] [blame]	1068	"Assumes char is 8 bits");
Chris Lattner	8577f62	2009-04-28 21:51:46 +0000	[diff] [blame]	1069	assert(PP.getTargetInfo().getIntWidth() <= 64 &&
				1070	(PP.getTargetInfo().getIntWidth() & 7) == 0 &&
				1071	"Assumes sizeof(int) on target is <= 64 and a multiple of char");
				1072	assert(PP.getTargetInfo().getWCharWidth() <= 64 &&
				1073	"Assumes sizeof(wchar) on target is <= 64");
Sanjiv Gupta	f09cb95	2009-04-21 02:21:29 +0000	[diff] [blame]	1074
Nick Lewycky	63cc55b	2013-08-21 02:40:19 +0000	[diff] [blame]	1075	SmallVector<uint32_t, 4> codepoint_buffer;
				1076	codepoint_buffer.resize(end - begin);
Seth Cantrell	8b2b677	2012-01-18 12:27:04 +0000	[diff] [blame]	1077	uint32_t *buffer_begin = &codepoint_buffer.front();
				1078	uint32_t *buffer_end = buffer_begin + codepoint_buffer.size();
Mike Stump	11289f4	2009-09-09 15:08:12 +0000	[diff] [blame]	1079
Seth Cantrell	8b2b677	2012-01-18 12:27:04 +0000	[diff] [blame]	1080	// Unicode escapes representing characters that cannot be correctly
				1081	// represented in a single code unit are disallowed in character literals
				1082	// by this implementation.
				1083	uint32_t largest_character_for_kind;
				1084	if (tok::wide_char_constant == Kind) {
Nick Lewycky	63cc55b	2013-08-21 02:40:19 +0000	[diff] [blame]	1085	largest_character_for_kind =
Nick Lewycky	8054f1d	2013-08-21 18:57:51 +0000	[diff] [blame]	1086	0xFFFFFFFFu >> (32-PP.getTargetInfo().getWCharWidth());
Richard Smith	3e3a705	2014-11-08 06:08:42 +0000	[diff] [blame]	1087	} else if (tok::utf8_char_constant == Kind) {
				1088	largest_character_for_kind = 0x7F;
Seth Cantrell	8b2b677	2012-01-18 12:27:04 +0000	[diff] [blame]	1089	} else if (tok::utf16_char_constant == Kind) {
				1090	largest_character_for_kind = 0xFFFF;
				1091	} else if (tok::utf32_char_constant == Kind) {
				1092	largest_character_for_kind = 0x10FFFF;
				1093	} else {
				1094	largest_character_for_kind = 0x7Fu;
Chris Lattner	8577f62	2009-04-28 21:51:46 +0000	[diff] [blame]	1095	}
				1096
Nick Lewycky	63cc55b	2013-08-21 02:40:19 +0000	[diff] [blame]	1097	while (begin != end) {
Seth Cantrell	8b2b677	2012-01-18 12:27:04 +0000	[diff] [blame]	1098	// Is this a span of non-escape characters?
				1099	if (begin[0] != '\\') {
				1100	char const *start = begin;
				1101	do {
				1102	++begin;
				1103	} while (begin != end && *begin != '\\');
				1104
Eli Friedman	9436352	2012-02-11 05:08:10 +0000	[diff] [blame]	1105	char const *tmp_in_start = start;
				1106	uint32_t *tmp_out_start = buffer_begin;
Seth Cantrell	8b2b677	2012-01-18 12:27:04 +0000	[diff] [blame]	1107	ConversionResult res =
Nick Lewycky	63cc55b	2013-08-21 02:40:19 +0000	[diff] [blame]	1108	ConvertUTF8toUTF32(reinterpret_cast<UTF8 const **>(&start),
				1109	reinterpret_cast<UTF8 const *>(begin),
				1110	&buffer_begin, buffer_end, strictConversion);
				1111	if (res != conversionOK) {
				1112	// If we see bad encoding for unprefixed character literals, warn and
				1113	// simply copy the byte values, for compatibility with gcc and
Eli Friedman	9436352	2012-02-11 05:08:10 +0000	[diff] [blame]	1114	// older versions of clang.
				1115	bool NoErrorOnBadEncoding = isAscii();
				1116	unsigned Msg = diag::err_bad_character_encoding;
				1117	if (NoErrorOnBadEncoding)
				1118	Msg = diag::warn_bad_character_encoding;
Nick Lewycky	8054f1d	2013-08-21 18:57:51 +0000	[diff] [blame]	1119	PP.Diag(Loc, Msg);
Eli Friedman	9436352	2012-02-11 05:08:10 +0000	[diff] [blame]	1120	if (NoErrorOnBadEncoding) {
				1121	start = tmp_in_start;
				1122	buffer_begin = tmp_out_start;
Nick Lewycky	63cc55b	2013-08-21 02:40:19 +0000	[diff] [blame]	1123	for (; start != begin; ++start, ++buffer_begin)
Eli Friedman	9436352	2012-02-11 05:08:10 +0000	[diff] [blame]	1124	buffer_begin = static_cast<uint8_t>(start);
				1125	} else {
				1126	HadError = true;
				1127	}
Seth Cantrell	8b2b677	2012-01-18 12:27:04 +0000	[diff] [blame]	1128	} else {
Nick Lewycky	63cc55b	2013-08-21 02:40:19 +0000	[diff] [blame]	1129	for (; tmp_out_start < buffer_begin; ++tmp_out_start) {
Eli Friedman	9436352	2012-02-11 05:08:10 +0000	[diff] [blame]	1130	if (*tmp_out_start > largest_character_for_kind) {
Seth Cantrell	8b2b677	2012-01-18 12:27:04 +0000	[diff] [blame]	1131	HadError = true;
				1132	PP.Diag(Loc, diag::err_character_too_large);
				1133	}
				1134	}
				1135	}
				1136
				1137	continue;
				1138	}
Nick Lewycky	63cc55b	2013-08-21 02:40:19 +0000	[diff] [blame]	1139	// Is this a Universal Character Name escape?
Seth Cantrell	8b2b677	2012-01-18 12:27:04 +0000	[diff] [blame]	1140	if (begin[1] == 'u' \|\| begin[1] == 'U') {
				1141	unsigned short UcnLen = 0;
Richard Smith	2a70e65	2012-03-09 22:27:51 +0000	[diff] [blame]	1142	if (!ProcessUCNEscape(TokBegin, begin, end, *buffer_begin, UcnLen,
Seth Cantrell	8b2b677	2012-01-18 12:27:04 +0000	[diff] [blame]	1143	FullSourceLoc(Loc, PP.getSourceManager()),
Nick Lewycky	63cc55b	2013-08-21 02:40:19 +0000	[diff] [blame]	1144	&PP.getDiagnostics(), PP.getLangOpts(), true)) {
Seth Cantrell	8b2b677	2012-01-18 12:27:04 +0000	[diff] [blame]	1145	HadError = true;
				1146	} else if (*buffer_begin > largest_character_for_kind) {
				1147	HadError = true;
Richard Smith	639b8d0	2012-09-08 07:16:20 +0000	[diff] [blame]	1148	PP.Diag(Loc, diag::err_character_too_large);
Seth Cantrell	8b2b677	2012-01-18 12:27:04 +0000	[diff] [blame]	1149	}
				1150
				1151	++buffer_begin;
				1152	continue;
				1153	}
				1154	unsigned CharWidth = getCharWidth(Kind, PP.getTargetInfo());
				1155	uint64_t result =
Richard Smith	639b8d0	2012-09-08 07:16:20 +0000	[diff] [blame]	1156	ProcessCharEscape(TokBegin, begin, end, HadError,
Nick Lewycky	8054f1d	2013-08-21 18:57:51 +0000	[diff] [blame]	1157	FullSourceLoc(Loc,PP.getSourceManager()),
Richard Smith	639b8d0	2012-09-08 07:16:20 +0000	[diff] [blame]	1158	CharWidth, &PP.getDiagnostics(), PP.getLangOpts());
Seth Cantrell	8b2b677	2012-01-18 12:27:04 +0000	[diff] [blame]	1159	*buffer_begin++ = result;
				1160	}
				1161
Nick Lewycky	63cc55b	2013-08-21 02:40:19 +0000	[diff] [blame]	1162	unsigned NumCharsSoFar = buffer_begin - &codepoint_buffer.front();
Seth Cantrell	8b2b677	2012-01-18 12:27:04 +0000	[diff] [blame]	1163
Chris Lattner	8577f62	2009-04-28 21:51:46 +0000	[diff] [blame]	1164	if (NumCharsSoFar > 1) {
Seth Cantrell	8b2b677	2012-01-18 12:27:04 +0000	[diff] [blame]	1165	if (isWide())
Douglas Gregor	fb65e59	2011-07-27 05:40:30 +0000	[diff] [blame]	1166	PP.Diag(Loc, diag::warn_extraneous_char_constant);
Seth Cantrell	8b2b677	2012-01-18 12:27:04 +0000	[diff] [blame]	1167	else if (isAscii() && NumCharsSoFar == 4)
				1168	PP.Diag(Loc, diag::ext_four_char_character_literal);
				1169	else if (isAscii())
Chris Lattner	8577f62	2009-04-28 21:51:46 +0000	[diff] [blame]	1170	PP.Diag(Loc, diag::ext_multichar_character_literal);
				1171	else
Seth Cantrell	8b2b677	2012-01-18 12:27:04 +0000	[diff] [blame]	1172	PP.Diag(Loc, diag::err_multichar_utf_character_literal);
Eli Friedman	d8cec57	2009-06-01 05:25:02 +0000	[diff] [blame]	1173	IsMultiChar = true;
Nick Lewycky	63cc55b	2013-08-21 02:40:19 +0000	[diff] [blame]	1174	} else {
Daniel Dunbar	a444cc2	2009-07-29 01:46:05 +0000	[diff] [blame]	1175	IsMultiChar = false;
Nick Lewycky	63cc55b	2013-08-21 02:40:19 +0000	[diff] [blame]	1176	}
Sanjiv Gupta	f09cb95	2009-04-21 02:21:29 +0000	[diff] [blame]	1177
Seth Cantrell	8b2b677	2012-01-18 12:27:04 +0000	[diff] [blame]	1178	llvm::APInt LitVal(PP.getTargetInfo().getIntWidth(), 0);
				1179
				1180	// Narrow character literals act as though their value is concatenated
				1181	// in this implementation, but warn on overflow.
				1182	bool multi_char_too_long = false;
				1183	if (isAscii() && isMultiChar()) {
				1184	LitVal = 0;
Nick Lewycky	63cc55b	2013-08-21 02:40:19 +0000	[diff] [blame]	1185	for (size_t i = 0; i < NumCharsSoFar; ++i) {
Seth Cantrell	8b2b677	2012-01-18 12:27:04 +0000	[diff] [blame]	1186	// check for enough leading zeros to shift into
				1187	multi_char_too_long \|= (LitVal.countLeadingZeros() < 8);
				1188	LitVal <<= 8;
				1189	LitVal = LitVal + (codepoint_buffer[i] & 0xFF);
				1190	}
				1191	} else if (NumCharsSoFar > 0) {
				1192	// otherwise just take the last character
				1193	LitVal = buffer_begin[-1];
				1194	}
				1195
				1196	if (!HadError && multi_char_too_long) {
Nick Lewycky	63cc55b	2013-08-21 02:40:19 +0000	[diff] [blame]	1197	PP.Diag(Loc, diag::warn_char_constant_too_large);
Seth Cantrell	8b2b677	2012-01-18 12:27:04 +0000	[diff] [blame]	1198	}
				1199
Sanjiv Gupta	f09cb95	2009-04-21 02:21:29 +0000	[diff] [blame]	1200	// Transfer the value from APInt to uint64_t
				1201	Value = LitVal.getZExtValue();
Mike Stump	11289f4	2009-09-09 15:08:12 +0000	[diff] [blame]	1202
Chris Lattner	2f5add6	2007-04-05 06:57:15 +0000	[diff] [blame]	1203	// If this is a single narrow character, sign extend it (e.g. '\xFF' is "-1")
				1204	// if 'char' is signed for this target (C99 6.4.4.4p10). Note that multiple
				1205	// character constants are not sign extended in the this implementation:
				1206	// '\xFF\xFF' = 65536 and '\x0\xFF' = 255, which matches GCC.
Douglas Gregor	fb65e59	2011-07-27 05:40:30 +0000	[diff] [blame]	1207	if (isAscii() && NumCharsSoFar == 1 && (Value & 128) &&
David Blaikie	bbafb8a	2012-03-11 07:00:24 +0000	[diff] [blame]	1208	PP.getLangOpts().CharIsSigned)
Chris Lattner	2f5add6	2007-04-05 06:57:15 +0000	[diff] [blame]	1209	Value = (signed char)Value;
				1210	}
				1211
James Dennett	99c193b	2012-06-19 21:04:25 +0000	[diff] [blame]	1212	/// \verbatim
Craig Topper	54edcca	2011-08-11 04:06:15 +0000	[diff] [blame]	1213	/// string-literal: [C++0x lex.string]
				1214	/// encoding-prefix " [s-char-sequence] "
				1215	/// encoding-prefix R raw-string
				1216	/// encoding-prefix:
				1217	/// u8
				1218	/// u
				1219	/// U
				1220	/// L
Steve Naroff	4f88b31	2007-03-13 22:37:02 +0000	[diff] [blame]	1221	/// s-char-sequence:
				1222	/// s-char
				1223	/// s-char-sequence s-char
				1224	/// s-char:
Craig Topper	54edcca	2011-08-11 04:06:15 +0000	[diff] [blame]	1225	/// any member of the source character set except the double-quote ",
				1226	/// backslash \, or new-line character
				1227	/// escape-sequence
Steve Naroff	4f88b31	2007-03-13 22:37:02 +0000	[diff] [blame]	1228	/// universal-character-name
Craig Topper	54edcca	2011-08-11 04:06:15 +0000	[diff] [blame]	1229	/// raw-string:
				1230	/// " d-char-sequence ( r-char-sequence ) d-char-sequence "
				1231	/// r-char-sequence:
				1232	/// r-char
				1233	/// r-char-sequence r-char
				1234	/// r-char:
				1235	/// any member of the source character set, except a right parenthesis )
				1236	/// followed by the initial d-char-sequence (which may be empty)
				1237	/// followed by a double quote ".
				1238	/// d-char-sequence:
				1239	/// d-char
				1240	/// d-char-sequence d-char
				1241	/// d-char:
				1242	/// any member of the basic source character set except:
				1243	/// space, the left parenthesis (, the right parenthesis ),
				1244	/// the backslash \, and the control characters representing horizontal
				1245	/// tab, vertical tab, form feed, and newline.
				1246	/// escape-sequence: [C++0x lex.ccon]
				1247	/// simple-escape-sequence
				1248	/// octal-escape-sequence
				1249	/// hexadecimal-escape-sequence
				1250	/// simple-escape-sequence:
NAKAMURA Takumi	9f8a02d	2011-08-12 05:49:51 +0000	[diff] [blame]	1251	/// one of \' \" \? \\ \a \b \f \n \r \t \v
Craig Topper	54edcca	2011-08-11 04:06:15 +0000	[diff] [blame]	1252	/// octal-escape-sequence:
				1253	/// \ octal-digit
				1254	/// \ octal-digit octal-digit
				1255	/// \ octal-digit octal-digit octal-digit
				1256	/// hexadecimal-escape-sequence:
				1257	/// \x hexadecimal-digit
				1258	/// hexadecimal-escape-sequence hexadecimal-digit
Steve Naroff	4f88b31	2007-03-13 22:37:02 +0000	[diff] [blame]	1259	/// universal-character-name:
				1260	/// \u hex-quad
				1261	/// \U hex-quad hex-quad
				1262	/// hex-quad:
				1263	/// hex-digit hex-digit hex-digit hex-digit
James Dennett	99c193b	2012-06-19 21:04:25 +0000	[diff] [blame]	1264	/// \endverbatim
Chris Lattner	2f5add6	2007-04-05 06:57:15 +0000	[diff] [blame]	1265	///
Steve Naroff	4f88b31	2007-03-13 22:37:02 +0000	[diff] [blame]	1266	StringLiteralParser::
Craig Topper	9d5583e	2014-06-26 04:58:39 +0000	[diff] [blame]	1267	StringLiteralParser(ArrayRef<Token> StringToks,
Chris Lattner	6bab435	2010-11-17 07:21:13 +0000	[diff] [blame]	1268	Preprocessor &PP, bool Complain)
David Blaikie	bbafb8a	2012-03-11 07:00:24 +0000	[diff] [blame]	1269	: SM(PP.getSourceManager()), Features(PP.getLangOpts()),
Craig Topper	d2d442c	2014-05-17 23:10:59 +0000	[diff] [blame]	1270	Target(PP.getTargetInfo()), Diags(Complain ? &PP.getDiagnostics() :nullptr),
Douglas Gregor	fb65e59	2011-07-27 05:40:30 +0000	[diff] [blame]	1271	MaxTokenLength(0), SizeBound(0), CharByteWidth(0), Kind(tok::unknown),
				1272	ResultPtr(ResultBuf.data()), hadError(false), Pascal(false) {
Craig Topper	9d5583e	2014-06-26 04:58:39 +0000	[diff] [blame]	1273	init(StringToks);
Chris Lattner	6bab435	2010-11-17 07:21:13 +0000	[diff] [blame]	1274	}
				1275
Craig Topper	9d5583e	2014-06-26 04:58:39 +0000	[diff] [blame]	1276	void StringLiteralParser::init(ArrayRef<Token> StringToks){
Argyrios Kyrtzidis	8b7252a	2011-05-17 22:09:56 +0000	[diff] [blame]	1277	// The literal token may have come from an invalid source location (e.g. due
				1278	// to a PCH error), in which case the token length will be 0.
Craig Topper	9d5583e	2014-06-26 04:58:39 +0000	[diff] [blame]	1279	if (StringToks.empty() \|\| StringToks[0].getLength() < 2)
Argyrios Kyrtzidis	9933e3a	2012-05-03 17:50:32 +0000	[diff] [blame]	1280	return DiagnoseLexingError(SourceLocation());
Argyrios Kyrtzidis	8b7252a	2011-05-17 22:09:56 +0000	[diff] [blame]	1281
Steve Naroff	4f88b31	2007-03-13 22:37:02 +0000	[diff] [blame]	1282	// Scan all of the string portions, remember the max individual token length,
				1283	// computing a bound on the concatenated string length, and see whether any
				1284	// piece is a wide-string. If any of the string portions is a wide-string
				1285	// literal, the result is a wide-string literal [C99 6.4.5p4].
Craig Topper	9d5583e	2014-06-26 04:58:39 +0000	[diff] [blame]	1286	assert(!StringToks.empty() && "expected at least one token");
Alexis Hunt	3b79186	2010-08-30 17:47:05 +0000	[diff] [blame]	1287	MaxTokenLength = StringToks[0].getLength();
Argyrios Kyrtzidis	8b7252a	2011-05-17 22:09:56 +0000	[diff] [blame]	1288	assert(StringToks[0].getLength() >= 2 && "literal token is invalid!");
Alexis Hunt	3b79186	2010-08-30 17:47:05 +0000	[diff] [blame]	1289	SizeBound = StringToks[0].getLength()-2; // -2 for "".
Douglas Gregor	fb65e59	2011-07-27 05:40:30 +0000	[diff] [blame]	1290	Kind = StringToks[0].getKind();
Alexis Hunt	3b79186	2010-08-30 17:47:05 +0000	[diff] [blame]	1291
				1292	hadError = false;
Chris Lattner	2f5add6	2007-04-05 06:57:15 +0000	[diff] [blame]	1293
				1294	// Implement Translation Phase #6: concatenation of string literals
				1295	/// (C99 5.1.1.2p1). The common case is only one string fragment.
Craig Topper	9d5583e	2014-06-26 04:58:39 +0000	[diff] [blame]	1296	for (unsigned i = 1; i != StringToks.size(); ++i) {
Argyrios Kyrtzidis	9933e3a	2012-05-03 17:50:32 +0000	[diff] [blame]	1297	if (StringToks[i].getLength() < 2)
				1298	return DiagnoseLexingError(StringToks[i].getLocation());
Argyrios Kyrtzidis	8b7252a	2011-05-17 22:09:56 +0000	[diff] [blame]	1299
Steve Naroff	4f88b31	2007-03-13 22:37:02 +0000	[diff] [blame]	1300	// The string could be shorter than this if it needs cleaning, but this is a
				1301	// reasonable bound, which is all we need.
Argyrios Kyrtzidis	8b7252a	2011-05-17 22:09:56 +0000	[diff] [blame]	1302	assert(StringToks[i].getLength() >= 2 && "literal token is invalid!");
Alexis Hunt	3b79186	2010-08-30 17:47:05 +0000	[diff] [blame]	1303	SizeBound += StringToks[i].getLength()-2; // -2 for "".
Mike Stump	11289f4	2009-09-09 15:08:12 +0000	[diff] [blame]	1304
Steve Naroff	4f88b31	2007-03-13 22:37:02 +0000	[diff] [blame]	1305	// Remember maximum string piece length.
Alexis Hunt	3b79186	2010-08-30 17:47:05 +0000	[diff] [blame]	1306	if (StringToks[i].getLength() > MaxTokenLength)
				1307	MaxTokenLength = StringToks[i].getLength();
Mike Stump	11289f4	2009-09-09 15:08:12 +0000	[diff] [blame]	1308
Douglas Gregor	fb65e59	2011-07-27 05:40:30 +0000	[diff] [blame]	1309	// Remember if we see any wide or utf-8/16/32 strings.
				1310	// Also check for illegal concatenations.
				1311	if (StringToks[i].isNot(Kind) && StringToks[i].isNot(tok::string_literal)) {
				1312	if (isAscii()) {
				1313	Kind = StringToks[i].getKind();
				1314	} else {
				1315	if (Diags)
Richard Smith	639b8d0	2012-09-08 07:16:20 +0000	[diff] [blame]	1316	Diags->Report(StringToks[i].getLocation(),
Douglas Gregor	fb65e59	2011-07-27 05:40:30 +0000	[diff] [blame]	1317	diag::err_unsupported_string_concat);
				1318	hadError = true;
				1319	}
				1320	}
Steve Naroff	4f88b31	2007-03-13 22:37:02 +0000	[diff] [blame]	1321	}
Chris Lattner	d42c29f	2009-02-26 23:01:51 +0000	[diff] [blame]	1322
Steve Naroff	4f88b31	2007-03-13 22:37:02 +0000	[diff] [blame]	1323	// Include space for the null terminator.
				1324	++SizeBound;
Mike Stump	11289f4	2009-09-09 15:08:12 +0000	[diff] [blame]	1325
Steve Naroff	4f88b31	2007-03-13 22:37:02 +0000	[diff] [blame]	1326	// TODO: K&R warning: "traditional C rejects string constant concatenation"
Mike Stump	11289f4	2009-09-09 15:08:12 +0000	[diff] [blame]	1327
Douglas Gregor	fb65e59	2011-07-27 05:40:30 +0000	[diff] [blame]	1328	// Get the width in bytes of char/wchar_t/char16_t/char32_t
				1329	CharByteWidth = getCharWidth(Kind, Target);
				1330	assert((CharByteWidth & 7) == 0 && "Assumes character size is byte multiple");
				1331	CharByteWidth /= 8;
Mike Stump	11289f4	2009-09-09 15:08:12 +0000	[diff] [blame]	1332
Steve Naroff	4f88b31	2007-03-13 22:37:02 +0000	[diff] [blame]	1333	// The output buffer size needs to be large enough to hold wide characters.
				1334	// This is a worst-case assumption which basically corresponds to L"" "long".
Douglas Gregor	fb65e59	2011-07-27 05:40:30 +0000	[diff] [blame]	1335	SizeBound *= CharByteWidth;
Mike Stump	11289f4	2009-09-09 15:08:12 +0000	[diff] [blame]	1336
Steve Naroff	4f88b31	2007-03-13 22:37:02 +0000	[diff] [blame]	1337	// Size the temporary buffer to hold the result string data.
				1338	ResultBuf.resize(SizeBound);
Mike Stump	11289f4	2009-09-09 15:08:12 +0000	[diff] [blame]	1339
Steve Naroff	4f88b31	2007-03-13 22:37:02 +0000	[diff] [blame]	1340	// Likewise, but for each string piece.
Dylan Noblesmith	2c1dd27	2012-02-05 02:13:05 +0000	[diff] [blame]	1341	SmallString<512> TokenBuf;
Steve Naroff	4f88b31	2007-03-13 22:37:02 +0000	[diff] [blame]	1342	TokenBuf.resize(MaxTokenLength);
Mike Stump	11289f4	2009-09-09 15:08:12 +0000	[diff] [blame]	1343
Steve Naroff	4f88b31	2007-03-13 22:37:02 +0000	[diff] [blame]	1344	// Loop over all the strings, getting their spelling, and expanding them to
				1345	// wide strings as appropriate.
				1346	ResultPtr = &ResultBuf[0]; // Next byte to fill in.
Mike Stump	11289f4	2009-09-09 15:08:12 +0000	[diff] [blame]	1347
Anders Carlsson	cbfc4b8	2007-10-15 02:50:23 +0000	[diff] [blame]	1348	Pascal = false;
Mike Stump	11289f4	2009-09-09 15:08:12 +0000	[diff] [blame]	1349
Richard Smith	e18f0fa	2012-03-05 04:02:15 +0000	[diff] [blame]	1350	SourceLocation UDSuffixTokLoc;
				1351
Craig Topper	9d5583e	2014-06-26 04:58:39 +0000	[diff] [blame]	1352	for (unsigned i = 0, e = StringToks.size(); i != e; ++i) {
Steve Naroff	4f88b31	2007-03-13 22:37:02 +0000	[diff] [blame]	1353	const char *ThisTokBuf = &TokenBuf[0];
				1354	// Get the spelling of the token, which eliminates trigraphs, etc. We know
				1355	// that ThisTokBuf points to a buffer that is big enough for the whole token
				1356	// and 'spelled' tokens can only shrink.
Douglas Gregor	7bda4b8	2010-03-16 05:20:39 +0000	[diff] [blame]	1357	bool StringInvalid = false;
Chris Lattner	6bab435	2010-11-17 07:21:13 +0000	[diff] [blame]	1358	unsigned ThisTokLen =
Chris Lattner	3972011	2010-11-17 07:26:20 +0000	[diff] [blame]	1359	Lexer::getSpelling(StringToks[i], ThisTokBuf, SM, Features,
				1360	&StringInvalid);
Argyrios Kyrtzidis	9933e3a	2012-05-03 17:50:32 +0000	[diff] [blame]	1361	if (StringInvalid)
				1362	return DiagnoseLexingError(StringToks[i].getLocation());
Douglas Gregor	7bda4b8	2010-03-16 05:20:39 +0000	[diff] [blame]	1363
Richard Smith	2a70e65	2012-03-09 22:27:51 +0000	[diff] [blame]	1364	const char *ThisTokBegin = ThisTokBuf;
Richard Smith	e18f0fa	2012-03-05 04:02:15 +0000	[diff] [blame]	1365	const char *ThisTokEnd = ThisTokBuf+ThisTokLen;
				1366
				1367	// Remove an optional ud-suffix.
				1368	if (ThisTokEnd[-1] != '"') {
				1369	const char *UDSuffixEnd = ThisTokEnd;
				1370	do {
				1371	--ThisTokEnd;
				1372	} while (ThisTokEnd[-1] != '"');
				1373
				1374	StringRef UDSuffix(ThisTokEnd, UDSuffixEnd - ThisTokEnd);
				1375
				1376	if (UDSuffixBuf.empty()) {
Richard Smith	8b7258b	2014-02-17 21:52:30 +0000	[diff] [blame]	1377	if (StringToks[i].hasUCN())
				1378	expandUCNs(UDSuffixBuf, UDSuffix);
				1379	else
				1380	UDSuffixBuf.assign(UDSuffix);
Richard Smith	75b67d6	2012-03-08 01:34:56 +0000	[diff] [blame]	1381	UDSuffixToken = i;
				1382	UDSuffixOffset = ThisTokEnd - ThisTokBuf;
Richard Smith	e18f0fa	2012-03-05 04:02:15 +0000	[diff] [blame]	1383	UDSuffixTokLoc = StringToks[i].getLocation();
Richard Smith	8b7258b	2014-02-17 21:52:30 +0000	[diff] [blame]	1384	} else {
				1385	SmallString<32> ExpandedUDSuffix;
				1386	if (StringToks[i].hasUCN()) {
				1387	expandUCNs(ExpandedUDSuffix, UDSuffix);
				1388	UDSuffix = ExpandedUDSuffix;
				1389	}
				1390
Richard Smith	e18f0fa	2012-03-05 04:02:15 +0000	[diff] [blame]	1391	// C++11 [lex.ext]p8: At the end of phase 6, if a string literal is the
				1392	// result of a concatenation involving at least one user-defined-string-
				1393	// literal, all the participating user-defined-string-literals shall
				1394	// have the same ud-suffix.
David Blaikie	dcb72d7	2014-03-09 05:18:27 +0000	[diff] [blame]	1395	if (UDSuffixBuf != UDSuffix) {
Richard Smith	8b7258b	2014-02-17 21:52:30 +0000	[diff] [blame]	1396	if (Diags) {
				1397	SourceLocation TokLoc = StringToks[i].getLocation();
				1398	Diags->Report(TokLoc, diag::err_string_concat_mixed_suffix)
				1399	<< UDSuffixBuf << UDSuffix
				1400	<< SourceRange(UDSuffixTokLoc, UDSuffixTokLoc)
				1401	<< SourceRange(TokLoc, TokLoc);
				1402	}
				1403	hadError = true;
Richard Smith	e18f0fa	2012-03-05 04:02:15 +0000	[diff] [blame]	1404	}
Richard Smith	e18f0fa	2012-03-05 04:02:15 +0000	[diff] [blame]	1405	}
				1406	}
				1407
				1408	// Strip the end quote.
				1409	--ThisTokEnd;
				1410
Steve Naroff	4f88b31	2007-03-13 22:37:02 +0000	[diff] [blame]	1411	// TODO: Input character set mapping support.
Mike Stump	11289f4	2009-09-09 15:08:12 +0000	[diff] [blame]	1412
Craig Topper	61147ed	2011-08-08 06:10:39 +0000	[diff] [blame]	1413	// Skip marker for wide or unicode strings.
Douglas Gregor	fb65e59	2011-07-27 05:40:30 +0000	[diff] [blame]	1414	if (ThisTokBuf[0] == 'L' \|\| ThisTokBuf[0] == 'u' \|\| ThisTokBuf[0] == 'U') {
Chris Lattner	c10adde	2007-05-20 05:00:58 +0000	[diff] [blame]	1415	++ThisTokBuf;
Douglas Gregor	fb65e59	2011-07-27 05:40:30 +0000	[diff] [blame]	1416	// Skip 8 of u8 marker for utf8 strings.
				1417	if (ThisTokBuf[0] == '8')
				1418	++ThisTokBuf;
Fariborz Jahanian	abaae2b	2010-08-31 23:34:27 +0000	[diff] [blame]	1419	}
Mike Stump	11289f4	2009-09-09 15:08:12 +0000	[diff] [blame]	1420
Craig Topper	54edcca	2011-08-11 04:06:15 +0000	[diff] [blame]	1421	// Check for raw string
				1422	if (ThisTokBuf[0] == 'R') {
				1423	ThisTokBuf += 2; // skip R"
Mike Stump	11289f4	2009-09-09 15:08:12 +0000	[diff] [blame]	1424
Craig Topper	54edcca	2011-08-11 04:06:15 +0000	[diff] [blame]	1425	const char *Prefix = ThisTokBuf;
				1426	while (ThisTokBuf[0] != '(')
Anders Carlsson	cbfc4b8	2007-10-15 02:50:23 +0000	[diff] [blame]	1427	++ThisTokBuf;
Craig Topper	54edcca	2011-08-11 04:06:15 +0000	[diff] [blame]	1428	++ThisTokBuf; // skip '('
Mike Stump	11289f4	2009-09-09 15:08:12 +0000	[diff] [blame]	1429
Richard Smith	8129245	2012-03-08 21:59:28 +0000	[diff] [blame]	1430	// Remove same number of characters from the end
				1431	ThisTokEnd -= ThisTokBuf - Prefix;
				1432	assert(ThisTokEnd >= ThisTokBuf && "malformed raw string literal");
Craig Topper	54edcca	2011-08-11 04:06:15 +0000	[diff] [blame]	1433
David Majnemer	54bbae5	2015-09-23 16:04:47 +0000	[diff] [blame]	1434	// C++14 [lex.string]p4: A source-file new-line in a raw string literal
				1435	// results in a new-line in the resulting execution string-literal.
				1436	StringRef RemainingTokenSpan(ThisTokBuf, ThisTokEnd - ThisTokBuf);
				1437	while (!RemainingTokenSpan.empty()) {
				1438	// Split the string literal on \r\n boundaries.
				1439	size_t CRLFPos = RemainingTokenSpan.find("\r\n");
				1440	StringRef BeforeCRLF = RemainingTokenSpan.substr(0, CRLFPos);
				1441	StringRef AfterCRLF = RemainingTokenSpan.substr(CRLFPos);
				1442
				1443	// Copy everything before the \r\n sequence into the string literal.
				1444	if (CopyStringFragment(StringToks[i], ThisTokBegin, BeforeCRLF))
				1445	hadError = true;
				1446
				1447	// Point into the \n inside the \r\n sequence and operate on the
				1448	// remaining portion of the literal.
				1449	RemainingTokenSpan = AfterCRLF.substr(1);
				1450	}
Craig Topper	54edcca	2011-08-11 04:06:15 +0000	[diff] [blame]	1451	} else {
Argyrios Kyrtzidis	4e5b5c3	2012-05-03 01:01:56 +0000	[diff] [blame]	1452	if (ThisTokBuf[0] != '"') {
				1453	// The file may have come from PCH and then changed after loading the
				1454	// PCH; Fail gracefully.
Argyrios Kyrtzidis	9933e3a	2012-05-03 17:50:32 +0000	[diff] [blame]	1455	return DiagnoseLexingError(StringToks[i].getLocation());
Argyrios Kyrtzidis	4e5b5c3	2012-05-03 01:01:56 +0000	[diff] [blame]	1456	}
Craig Topper	54edcca	2011-08-11 04:06:15 +0000	[diff] [blame]	1457	++ThisTokBuf; // skip "
				1458
				1459	// Check if this is a pascal string
				1460	if (Features.PascalStrings && ThisTokBuf + 1 != ThisTokEnd &&
				1461	ThisTokBuf[0] == '\\' && ThisTokBuf[1] == 'p') {
				1462
				1463	// If the \p sequence is found in the first token, we have a pascal string
				1464	// Otherwise, if we already have a pascal string, ignore the first \p
				1465	if (i == 0) {
Steve Naroff	4f88b31	2007-03-13 22:37:02 +0000	[diff] [blame]	1466	++ThisTokBuf;
Craig Topper	54edcca	2011-08-11 04:06:15 +0000	[diff] [blame]	1467	Pascal = true;
				1468	} else if (Pascal)
				1469	ThisTokBuf += 2;
				1470	}
Mike Stump	11289f4	2009-09-09 15:08:12 +0000	[diff] [blame]	1471
Craig Topper	54edcca	2011-08-11 04:06:15 +0000	[diff] [blame]	1472	while (ThisTokBuf != ThisTokEnd) {
				1473	// Is this a span of non-escape characters?
				1474	if (ThisTokBuf[0] != '\\') {
				1475	const char *InStart = ThisTokBuf;
				1476	do {
				1477	++ThisTokBuf;
				1478	} while (ThisTokBuf != ThisTokEnd && ThisTokBuf[0] != '\\');
				1479
				1480	// Copy the character span over.
Richard Smith	639b8d0	2012-09-08 07:16:20 +0000	[diff] [blame]	1481	if (CopyStringFragment(StringToks[i], ThisTokBegin,
				1482	StringRef(InStart, ThisTokBuf - InStart)))
				1483	hadError = true;
Craig Topper	54edcca	2011-08-11 04:06:15 +0000	[diff] [blame]	1484	continue;
Steve Naroff	4f88b31	2007-03-13 22:37:02 +0000	[diff] [blame]	1485	}
Craig Topper	54edcca	2011-08-11 04:06:15 +0000	[diff] [blame]	1486	// Is this a Universal Character Name escape?
				1487	if (ThisTokBuf[1] == 'u' \|\| ThisTokBuf[1] == 'U') {
Richard Smith	2a70e65	2012-03-09 22:27:51 +0000	[diff] [blame]	1488	EncodeUCNEscape(ThisTokBegin, ThisTokBuf, ThisTokEnd,
				1489	ResultPtr, hadError,
				1490	FullSourceLoc(StringToks[i].getLocation(), SM),
Craig Topper	54edcca	2011-08-11 04:06:15 +0000	[diff] [blame]	1491	CharByteWidth, Diags, Features);
				1492	continue;
				1493	}
				1494	// Otherwise, this is a non-UCN escape character. Process it.
				1495	unsigned ResultChar =
Richard Smith	639b8d0	2012-09-08 07:16:20 +0000	[diff] [blame]	1496	ProcessCharEscape(ThisTokBegin, ThisTokBuf, ThisTokEnd, hadError,
Craig Topper	54edcca	2011-08-11 04:06:15 +0000	[diff] [blame]	1497	FullSourceLoc(StringToks[i].getLocation(), SM),
Richard Smith	639b8d0	2012-09-08 07:16:20 +0000	[diff] [blame]	1498	CharByteWidth*8, Diags, Features);
Mike Stump	11289f4	2009-09-09 15:08:12 +0000	[diff] [blame]	1499
Eli Friedman	d137079	2011-11-02 23:06:23 +0000	[diff] [blame]	1500	if (CharByteWidth == 4) {
				1501	// FIXME: Make the type of the result buffer correct instead of
				1502	// using reinterpret_cast.
				1503	UTF32 ResultWidePtr = reinterpret_cast<UTF32>(ResultPtr);
Nico Weber	d60b72f	2011-11-14 05:17:37 +0000	[diff] [blame]	1504	*ResultWidePtr = ResultChar;
Eli Friedman	d137079	2011-11-02 23:06:23 +0000	[diff] [blame]	1505	ResultPtr += 4;
				1506	} else if (CharByteWidth == 2) {
				1507	// FIXME: Make the type of the result buffer correct instead of
				1508	// using reinterpret_cast.
				1509	UTF16 ResultWidePtr = reinterpret_cast<UTF16>(ResultPtr);
Nico Weber	d60b72f	2011-11-14 05:17:37 +0000	[diff] [blame]	1510	*ResultWidePtr = ResultChar & 0xFFFF;
Eli Friedman	d137079	2011-11-02 23:06:23 +0000	[diff] [blame]	1511	ResultPtr += 2;
				1512	} else {
				1513	assert(CharByteWidth == 1 && "Unexpected char width");
				1514	*ResultPtr++ = ResultChar & 0xFF;
				1515	}
Craig Topper	54edcca	2011-08-11 04:06:15 +0000	[diff] [blame]	1516	}
Steve Naroff	4f88b31	2007-03-13 22:37:02 +0000	[diff] [blame]	1517	}
				1518	}
Mike Stump	11289f4	2009-09-09 15:08:12 +0000	[diff] [blame]	1519
Chris Lattner	8a24e58	2009-01-16 18:51:42 +0000	[diff] [blame]	1520	if (Pascal) {
Eli Friedman	2055470	2011-11-05 00:41:04 +0000	[diff] [blame]	1521	if (CharByteWidth == 4) {
				1522	// FIXME: Make the type of the result buffer correct instead of
				1523	// using reinterpret_cast.
				1524	UTF32 ResultWidePtr = reinterpret_cast<UTF32>(ResultBuf.data());
				1525	ResultWidePtr[0] = GetNumStringChars() - 1;
				1526	} else if (CharByteWidth == 2) {
				1527	// FIXME: Make the type of the result buffer correct instead of
				1528	// using reinterpret_cast.
				1529	UTF16 ResultWidePtr = reinterpret_cast<UTF16>(ResultBuf.data());
				1530	ResultWidePtr[0] = GetNumStringChars() - 1;
				1531	} else {
				1532	assert(CharByteWidth == 1 && "Unexpected char width");
				1533	ResultBuf[0] = GetNumStringChars() - 1;
				1534	}
Chris Lattner	8a24e58	2009-01-16 18:51:42 +0000	[diff] [blame]	1535
				1536	// Verify that pascal strings aren't too large.
Chris Lattner	6bab435	2010-11-17 07:21:13 +0000	[diff] [blame]	1537	if (GetStringLength() > 256) {
Richard Smith	639b8d0	2012-09-08 07:16:20 +0000	[diff] [blame]	1538	if (Diags)
Craig Topper	9d5583e	2014-06-26 04:58:39 +0000	[diff] [blame]	1539	Diags->Report(StringToks.front().getLocation(),
Chris Lattner	6bab435	2010-11-17 07:21:13 +0000	[diff] [blame]	1540	diag::err_pascal_string_too_long)
Craig Topper	9d5583e	2014-06-26 04:58:39 +0000	[diff] [blame]	1541	<< SourceRange(StringToks.front().getLocation(),
				1542	StringToks.back().getLocation());
Douglas Gregor	fb65e59	2011-07-27 05:40:30 +0000	[diff] [blame]	1543	hadError = true;
Eli Friedman	1c3fb22	2009-04-01 03:17:08 +0000	[diff] [blame]	1544	return;
				1545	}
Chris Lattner	6bab435	2010-11-17 07:21:13 +0000	[diff] [blame]	1546	} else if (Diags) {
Douglas Gregor	b37b46e	2010-07-20 14:33:20 +0000	[diff] [blame]	1547	// Complain if this string literal has too many characters.
Chris Lattner	2be8aa9	2010-11-17 07:12:42 +0000	[diff] [blame]	1548	unsigned MaxChars = Features.CPlusPlus? 65536 : Features.C99 ? 4095 : 509;
Benjamin Kramer	f23a6e6	2012-11-08 19:22:26 +0000	[diff] [blame]	1549
Douglas Gregor	b37b46e	2010-07-20 14:33:20 +0000	[diff] [blame]	1550	if (GetNumStringChars() > MaxChars)
Craig Topper	9d5583e	2014-06-26 04:58:39 +0000	[diff] [blame]	1551	Diags->Report(StringToks.front().getLocation(),
Chris Lattner	6bab435	2010-11-17 07:21:13 +0000	[diff] [blame]	1552	diag::ext_string_too_long)
Douglas Gregor	b37b46e	2010-07-20 14:33:20 +0000	[diff] [blame]	1553	<< GetNumStringChars() << MaxChars
Chris Lattner	2be8aa9	2010-11-17 07:12:42 +0000	[diff] [blame]	1554	<< (Features.CPlusPlus ? 2 : Features.C99 ? 1 : 0)
Craig Topper	9d5583e	2014-06-26 04:58:39 +0000	[diff] [blame]	1555	<< SourceRange(StringToks.front().getLocation(),
				1556	StringToks.back().getLocation());
Chris Lattner	8a24e58	2009-01-16 18:51:42 +0000	[diff] [blame]	1557	}
Steve Naroff	4f88b31	2007-03-13 22:37:02 +0000	[diff] [blame]	1558	}
Chris Lattner	ddb7191	2009-02-18 19:21:10 +0000	[diff] [blame]	1559
Benjamin Kramer	f23a6e6	2012-11-08 19:22:26 +0000	[diff] [blame]	1560	static const char resyncUTF8(const char Err, const char *End) {
				1561	if (Err == End)
				1562	return End;
				1563	End = Err + std::min<unsigned>(getNumBytesForUTF8(*Err), End-Err);
				1564	while (++Err != End && (*Err & 0xC0) == 0x80)
				1565	;
				1566	return Err;
Seth Cantrell	4cfc817	2012-10-28 18:24:46 +0000	[diff] [blame]	1567	}
				1568
Richard Smith	639b8d0	2012-09-08 07:16:20 +0000	[diff] [blame]	1569	/// \brief This function copies from Fragment, which is a sequence of bytes
				1570	/// within Tok's contents (which begin at TokBegin) into ResultPtr.
Craig Topper	54edcca	2011-08-11 04:06:15 +0000	[diff] [blame]	1571	/// Performs widening for multi-byte characters.
Richard Smith	639b8d0	2012-09-08 07:16:20 +0000	[diff] [blame]	1572	bool StringLiteralParser::CopyStringFragment(const Token &Tok,
				1573	const char *TokBegin,
				1574	StringRef Fragment) {
				1575	const UTF8 *ErrorPtrTmp;
				1576	if (ConvertUTF8toWide(CharByteWidth, Fragment, ResultPtr, ErrorPtrTmp))
				1577	return false;
Craig Topper	54edcca	2011-08-11 04:06:15 +0000	[diff] [blame]	1578
Eli Friedman	9436352	2012-02-11 05:08:10 +0000	[diff] [blame]	1579	// If we see bad encoding for unprefixed string literals, warn and
				1580	// simply copy the byte values, for compatibility with gcc and older
				1581	// versions of clang.
				1582	bool NoErrorOnBadEncoding = isAscii();
Richard Smith	639b8d0	2012-09-08 07:16:20 +0000	[diff] [blame]	1583	if (NoErrorOnBadEncoding) {
				1584	memcpy(ResultPtr, Fragment.data(), Fragment.size());
				1585	ResultPtr += Fragment.size();
				1586	}
Seth Cantrell	4cfc817	2012-10-28 18:24:46 +0000	[diff] [blame]	1587
Richard Smith	639b8d0	2012-09-08 07:16:20 +0000	[diff] [blame]	1588	if (Diags) {
Seth Cantrell	4cfc817	2012-10-28 18:24:46 +0000	[diff] [blame]	1589	const char ErrorPtr = reinterpret_cast<const char >(ErrorPtrTmp);
				1590
				1591	FullSourceLoc SourceLoc(Tok.getLocation(), SM);
				1592	const DiagnosticBuilder &Builder =
				1593	Diag(Diags, Features, SourceLoc, TokBegin,
Benjamin Kramer	f23a6e6	2012-11-08 19:22:26 +0000	[diff] [blame]	1594	ErrorPtr, resyncUTF8(ErrorPtr, Fragment.end()),
Seth Cantrell	4cfc817	2012-10-28 18:24:46 +0000	[diff] [blame]	1595	NoErrorOnBadEncoding ? diag::warn_bad_string_encoding
				1596	: diag::err_bad_string_encoding);
				1597
Benjamin Kramer	f23a6e6	2012-11-08 19:22:26 +0000	[diff] [blame]	1598	const char *NextStart = resyncUTF8(ErrorPtr, Fragment.end());
Seth Cantrell	4cfc817	2012-10-28 18:24:46 +0000	[diff] [blame]	1599	StringRef NextFragment(NextStart, Fragment.end()-NextStart);
				1600
Benjamin Kramer	7d574e2	2012-11-08 19:22:31 +0000	[diff] [blame]	1601	// Decode into a dummy buffer.
				1602	SmallString<512> Dummy;
				1603	Dummy.reserve(Fragment.size() * CharByteWidth);
				1604	char *Ptr = Dummy.data();
				1605
Alexander Kornienko	d3b4e08	2014-05-22 19:56:11 +0000	[diff] [blame]	1606	while (!ConvertUTF8toWide(CharByteWidth, NextFragment, Ptr, ErrorPtrTmp)) {
Seth Cantrell	4cfc817	2012-10-28 18:24:46 +0000	[diff] [blame]	1607	const char ErrorPtr = reinterpret_cast<const char >(ErrorPtrTmp);
Benjamin Kramer	f23a6e6	2012-11-08 19:22:26 +0000	[diff] [blame]	1608	NextStart = resyncUTF8(ErrorPtr, Fragment.end());
Seth Cantrell	4cfc817	2012-10-28 18:24:46 +0000	[diff] [blame]	1609	Builder << MakeCharSourceRange(Features, SourceLoc, TokBegin,
				1610	ErrorPtr, NextStart);
				1611	NextFragment = StringRef(NextStart, Fragment.end()-NextStart);
				1612	}
Richard Smith	639b8d0	2012-09-08 07:16:20 +0000	[diff] [blame]	1613	}
Eli Friedman	9436352	2012-02-11 05:08:10 +0000	[diff] [blame]	1614	return !NoErrorOnBadEncoding;
				1615	}
Craig Topper	54edcca	2011-08-11 04:06:15 +0000	[diff] [blame]	1616
Argyrios Kyrtzidis	9933e3a	2012-05-03 17:50:32 +0000	[diff] [blame]	1617	void StringLiteralParser::DiagnoseLexingError(SourceLocation Loc) {
				1618	hadError = true;
				1619	if (Diags)
				1620	Diags->Report(Loc, diag::err_lexing_string);
				1621	}
				1622
Chris Lattner	ddb7191	2009-02-18 19:21:10 +0000	[diff] [blame]	1623	/// getOffsetOfStringByte - This function returns the offset of the
				1624	/// specified byte of the string data represented by Token. This handles
				1625	/// advancing over escape sequences in the string.
				1626	unsigned StringLiteralParser::getOffsetOfStringByte(const Token &Tok,
Chris Lattner	bde1b81	2010-11-17 06:46:14 +0000	[diff] [blame]	1627	unsigned ByteNo) const {
Chris Lattner	ddb7191	2009-02-18 19:21:10 +0000	[diff] [blame]	1628	// Get the spelling of the token.
Dylan Noblesmith	2c1dd27	2012-02-05 02:13:05 +0000	[diff] [blame]	1629	SmallString<32> SpellingBuffer;
Alexis Hunt	3b79186	2010-08-30 17:47:05 +0000	[diff] [blame]	1630	SpellingBuffer.resize(Tok.getLength());
Mike Stump	11289f4	2009-09-09 15:08:12 +0000	[diff] [blame]	1631
Douglas Gregor	7bda4b8	2010-03-16 05:20:39 +0000	[diff] [blame]	1632	bool StringInvalid = false;
Chris Lattner	ddb7191	2009-02-18 19:21:10 +0000	[diff] [blame]	1633	const char *SpellingPtr = &SpellingBuffer[0];
Chris Lattner	3972011	2010-11-17 07:26:20 +0000	[diff] [blame]	1634	unsigned TokLen = Lexer::getSpelling(Tok, SpellingPtr, SM, Features,
				1635	&StringInvalid);
Chris Lattner	7a02bfd	2010-11-17 06:26:08 +0000	[diff] [blame]	1636	if (StringInvalid)
Douglas Gregor	7bda4b8	2010-03-16 05:20:39 +0000	[diff] [blame]	1637	return 0;
Chris Lattner	ddb7191	2009-02-18 19:21:10 +0000	[diff] [blame]	1638
Chris Lattner	ddb7191	2009-02-18 19:21:10 +0000	[diff] [blame]	1639	const char *SpellingStart = SpellingPtr;
				1640	const char *SpellingEnd = SpellingPtr+TokLen;
				1641
Richard Smith	4060f77	2012-06-13 05:37:23 +0000	[diff] [blame]	1642	// Handle UTF-8 strings just like narrow strings.
				1643	if (SpellingPtr[0] == 'u' && SpellingPtr[1] == '8')
				1644	SpellingPtr += 2;
				1645
				1646	assert(SpellingPtr[0] != 'L' && SpellingPtr[0] != 'u' &&
				1647	SpellingPtr[0] != 'U' && "Doesn't handle wide or utf strings yet");
				1648
				1649	// For raw string literals, this is easy.
				1650	if (SpellingPtr[0] == 'R') {
				1651	assert(SpellingPtr[1] == '"' && "Should be a raw string literal!");
				1652	// Skip 'R"'.
				1653	SpellingPtr += 2;
				1654	while (*SpellingPtr != '(') {
				1655	++SpellingPtr;
				1656	assert(SpellingPtr < SpellingEnd && "Missing ( for raw string literal");
				1657	}
				1658	// Skip '('.
				1659	++SpellingPtr;
				1660	return SpellingPtr - SpellingStart + ByteNo;
				1661	}
				1662
				1663	// Skip over the leading quote
Chris Lattner	ddb7191	2009-02-18 19:21:10 +0000	[diff] [blame]	1664	assert(SpellingPtr[0] == '"' && "Should be a string literal!");
				1665	++SpellingPtr;
Mike Stump	11289f4	2009-09-09 15:08:12 +0000	[diff] [blame]	1666
Chris Lattner	ddb7191	2009-02-18 19:21:10 +0000	[diff] [blame]	1667	// Skip over bytes until we find the offset we're looking for.
				1668	while (ByteNo) {
				1669	assert(SpellingPtr < SpellingEnd && "Didn't find byte offset!");
Mike Stump	11289f4	2009-09-09 15:08:12 +0000	[diff] [blame]	1670
Chris Lattner	ddb7191	2009-02-18 19:21:10 +0000	[diff] [blame]	1671	// Step over non-escapes simply.
				1672	if (*SpellingPtr != '\\') {
				1673	++SpellingPtr;
				1674	--ByteNo;
				1675	continue;
				1676	}
Mike Stump	11289f4	2009-09-09 15:08:12 +0000	[diff] [blame]	1677
Chris Lattner	ddb7191	2009-02-18 19:21:10 +0000	[diff] [blame]	1678	// Otherwise, this is an escape character. Advance over it.
				1679	bool HadError = false;
Richard Smith	4060f77	2012-06-13 05:37:23 +0000	[diff] [blame]	1680	if (SpellingPtr[1] == 'u' \|\| SpellingPtr[1] == 'U') {
				1681	const char *EscapePtr = SpellingPtr;
				1682	unsigned Len = MeasureUCNEscape(SpellingStart, SpellingPtr, SpellingEnd,
				1683	1, Features, HadError);
				1684	if (Len > ByteNo) {
				1685	// ByteNo is somewhere within the escape sequence.
				1686	SpellingPtr = EscapePtr;
				1687	break;
				1688	}
				1689	ByteNo -= Len;
				1690	} else {
Richard Smith	639b8d0	2012-09-08 07:16:20 +0000	[diff] [blame]	1691	ProcessCharEscape(SpellingStart, SpellingPtr, SpellingEnd, HadError,
Richard Smith	4060f77	2012-06-13 05:37:23 +0000	[diff] [blame]	1692	FullSourceLoc(Tok.getLocation(), SM),
Richard Smith	639b8d0	2012-09-08 07:16:20 +0000	[diff] [blame]	1693	CharByteWidth*8, Diags, Features);
Richard Smith	4060f77	2012-06-13 05:37:23 +0000	[diff] [blame]	1694	--ByteNo;
				1695	}
Chris Lattner	ddb7191	2009-02-18 19:21:10 +0000	[diff] [blame]	1696	assert(!HadError && "This method isn't valid on erroneous strings");
Chris Lattner	ddb7191	2009-02-18 19:21:10 +0000	[diff] [blame]	1697	}
Mike Stump	11289f4	2009-09-09 15:08:12 +0000	[diff] [blame]	1698
Chris Lattner	ddb7191	2009-02-18 19:21:10 +0000	[diff] [blame]	1699	return SpellingPtr-SpellingStart;
				1700	}