Lex/LiteralSupport.cpp - fp2-dev/platform/external/clang - Gitiles

 //===--- LiteralSupport.cpp - Code to parse and process literals ----------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
 // This file was developed by Steve Naroff and is distributed under
 // the University of Illinois Open Source License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
 //
 // This file implements the NumericLiteralParser, CharLiteralParser, and
 // StringLiteralParser interfaces.
 //
 //===----------------------------------------------------------------------===//

 #include "clang/Lex/LiteralSupport.h"
 #include "clang/Lex/Preprocessor.h"
 #include "clang/Basic/Diagnostic.h"
 #include "clang/Basic/SourceManager.h"
 #include "clang/Basic/TargetInfo.h"
 #include "llvm/ADT/APInt.h"
 #include "llvm/ADT/StringExtras.h"
 using namespace clang;

 /// HexDigitValue - Return the value of the specified hex digit, or -1 if it's
 /// not valid.
 static int HexDigitValue(char C) {
   if (C >= '0' && C <= '9') return C-'0';
   if (C >= 'a' && C <= 'f') return C-'a'+10;
   if (C >= 'A' && C <= 'F') return C-'A'+10;
   return -1;
 }

 /// ProcessCharEscape - Parse a standard C escape sequence, which can occur in
 /// either a character or a string literal.
 static unsigned ProcessCharEscape(const char *&ThisTokBuf,
                                   const char *ThisTokEnd, bool &HadError,
                                   SourceLocation Loc, bool IsWide,
                                   Preprocessor &PP) {
   // Skip the '\' char.
   ++ThisTokBuf;

   // We know that this character can't be off the end of the buffer, because
   // that would have been \", which would not have been the end of string.
   unsigned ResultChar = *ThisTokBuf++;
   switch (ResultChar) {
   // These map to themselves.
   case '\\': case '\'': case '"': case '?': break;

     // These have fixed mappings.
   case 'a':
     // TODO: K&R: the meaning of '\\a' is different in traditional C
     ResultChar = 7;
     break;
   case 'b':
     ResultChar = 8;
     break;
   case 'e':
     PP.Diag(Loc, diag::ext_nonstandard_escape, "e");
     ResultChar = 27;
     break;
   case 'f':
     ResultChar = 12;
     break;
   case 'n':
     ResultChar = 10;
     break;
   case 'r':
     ResultChar = 13;
     break;
   case 't':
     ResultChar = 9;
     break;
   case 'v':
     ResultChar = 11;
     break;

     //case 'u': case 'U':  // FIXME: UCNs.
   case 'x': { // Hex escape.
     ResultChar = 0;
     if (ThisTokBuf == ThisTokEnd || !isxdigit(*ThisTokBuf)) {
       PP.Diag(Loc, diag::err_hex_escape_no_digits);
       HadError = 1;
       break;
     }

     // Hex escapes are a maximal series of hex digits.
     bool Overflow = false;
     for (; ThisTokBuf != ThisTokEnd; ++ThisTokBuf) {
       int CharVal = HexDigitValue(ThisTokBuf[0]);
       if (CharVal == -1) break;
       Overflow |= (ResultChar & 0xF0000000) ? true : false;  // About to shift out a digit?
       ResultChar <<= 4;
       ResultChar |= CharVal;
     }

     // See if any bits will be truncated when evaluated as a character.
     unsigned CharWidth = IsWide ? PP.getTargetInfo().getWCharWidth(Loc)
                                 : PP.getTargetInfo().getCharWidth(Loc);
     if (CharWidth != 32 && (ResultChar >> CharWidth) != 0) {
       Overflow = true;
       ResultChar &= ~0U >> (32-CharWidth);
     }

     // Check for overflow.
     if (Overflow)   // Too many digits to fit in
       PP.Diag(Loc, diag::warn_hex_escape_too_large);
     break;
   }
   case '0': case '1': case '2': case '3':
   case '4': case '5': case '6': case '7': {
     // Octal escapes.
     --ThisTokBuf;
     ResultChar = 0;

     // Octal escapes are a series of octal digits with maximum length 3.
     // "\0123" is a two digit sequence equal to "\012" "3".
     unsigned NumDigits = 0;
     do {
       ResultChar <<= 3;
       ResultChar |= *ThisTokBuf++ - '0';
       ++NumDigits;
     } while (ThisTokBuf != ThisTokEnd && NumDigits < 3 &&
              ThisTokBuf[0] >= '0' && ThisTokBuf[0] <= '7');

     // Check for overflow.  Reject '\777', but not L'\777'.
     unsigned CharWidth = IsWide ? PP.getTargetInfo().getWCharWidth(Loc)
                                 : PP.getTargetInfo().getCharWidth(Loc);
     if (CharWidth != 32 && (ResultChar >> CharWidth) != 0) {
       PP.Diag(Loc, diag::warn_octal_escape_too_large);
       ResultChar &= ~0U >> (32-CharWidth);
     }
     break;
   }

     // Otherwise, these are not valid escapes.
   case '(': case '{': case '[': case '%':
     // GCC accepts these as extensions.  We warn about them as such though.
     if (!PP.getLangOptions().NoExtensions) {
       PP.Diag(Loc, diag::ext_nonstandard_escape,
               std::string()+(char)ResultChar);
       break;
     }
     // FALL THROUGH.
   default:
     if (isgraph(ThisTokBuf[0])) {
       PP.Diag(Loc, diag::ext_unknown_escape, std::string()+(char)ResultChar);
     } else {
       PP.Diag(Loc, diag::ext_unknown_escape, "x"+llvm::utohexstr(ResultChar));
     }
     break;
   }

   return ResultChar;
 }


 ///       integer-constant: [C99 6.4.4.1]
 ///         decimal-constant integer-suffix
 ///         octal-constant integer-suffix
 ///         hexadecimal-constant integer-suffix
 ///       decimal-constant:
 ///         nonzero-digit
 ///         decimal-constant digit
 ///       octal-constant:
 ///         0
 ///         octal-constant octal-digit
 ///       hexadecimal-constant:
 ///         hexadecimal-prefix hexadecimal-digit
 ///         hexadecimal-constant hexadecimal-digit
 ///       hexadecimal-prefix: one of
 ///         0x 0X
 ///       integer-suffix:
 ///         unsigned-suffix [long-suffix]
 ///         unsigned-suffix [long-long-suffix]
 ///         long-suffix [unsigned-suffix]
 ///         long-long-suffix [unsigned-sufix]
 ///       nonzero-digit:
 ///         1 2 3 4 5 6 7 8 9
 ///       octal-digit:
 ///         0 1 2 3 4 5 6 7
 ///       hexadecimal-digit:
 ///         0 1 2 3 4 5 6 7 8 9
 ///         a b c d e f
 ///         A B C D E F
 ///       unsigned-suffix: one of
 ///         u U
 ///       long-suffix: one of
 ///         l L
 ///       long-long-suffix: one of
 ///         ll LL
 ///
 ///       floating-constant: [C99 6.4.4.2]
 ///         TODO: add rules...
 ///

 NumericLiteralParser::
 NumericLiteralParser(const char *begin, const char *end,
                      SourceLocation TokLoc, Preprocessor &pp)
   : PP(pp), ThisTokBegin(begin), ThisTokEnd(end) {
   s = DigitsBegin = begin;
   saw_exponent = false;
   saw_period = false;
   isLong = false;
   isUnsigned = false;
   isLongLong = false;
   isFloat = false;
   isImaginary = false;
   hadError = false;

   if (*s == '0') { // parse radix
     s++;
     if ((*s == 'x' || *s == 'X') && (isxdigit(s[1]) || s[1] == '.')) {
       s++;
       radix = 16;
       DigitsBegin = s;
       s = SkipHexDigits(s);
       if (s == ThisTokEnd) {
         // Done.
       } else if (*s == '.') {
         s++;
         saw_period = true;
         s = SkipHexDigits(s);
       }
       // A binary exponent can appear with or with a '.'. If dotted, the
       // binary exponent is required.
       if (*s == 'p' || *s == 'P') {
         s++;
         saw_exponent = true;
         if (*s == '+' || *s == '-')  s++; // sign
         const char *first_non_digit = SkipDigits(s);
         if (first_non_digit == s) {
           Diag(TokLoc, diag::err_exponent_has_no_digits);
           return;
         } else {
           s = first_non_digit;
         }
       } else if (saw_period) {
         Diag(TokLoc, diag::err_hexconstant_requires_exponent);
         return;
       }
     } else if (*s == 'b' || *s == 'B') {
       // 0b101010 is a GCC extension.
       ++s;
       radix = 2;
       DigitsBegin = s;
       s = SkipBinaryDigits(s);
       if (s == ThisTokEnd) {
         // Done.
       } else if (isxdigit(*s)) {
         Diag(TokLoc, diag::err_invalid_binary_digit, std::string(s, s+1));
         return;
       }
       PP.Diag(TokLoc, diag::ext_binary_literal);
     } else {
       // For now, the radix is set to 8. If we discover that we have a
       // floating point constant, the radix will change to 10. Octal floating
       // point constants are not permitted (only decimal and hexadecimal).
       radix = 8;
       DigitsBegin = s;
       s = SkipOctalDigits(s);
       if (s == ThisTokEnd) {
         // Done.
       } else if (isxdigit(*s)) {
         TokLoc = PP.AdvanceToTokenCharacter(TokLoc, s-begin);
         Diag(TokLoc, diag::err_invalid_octal_digit, std::string(s, s+1));
         return;
       } else if (*s == '.') {
         s++;
         radix = 10;
         saw_period = true;
         s = SkipDigits(s);
       }
       if (*s == 'e' || *s == 'E') { // exponent
         s++;
         radix = 10;
         saw_exponent = true;
         if (*s == '+' || *s == '-')  s++; // sign
         const char *first_non_digit = SkipDigits(s);
         if (first_non_digit == s) {
           Diag(TokLoc, diag::err_exponent_has_no_digits);
           return;
         } else {
           s = first_non_digit;
         }
       }
     }
   } else { // the first digit is non-zero
     radix = 10;
     s = SkipDigits(s);
     if (s == ThisTokEnd) {
       // Done.
     } else if (isxdigit(*s)) {
       Diag(TokLoc, diag::err_invalid_decimal_digit, std::string(s, s+1));
       return;
     } else if (*s == '.') {
       s++;
       saw_period = true;
       s = SkipDigits(s);
     }
     if (*s == 'e' || *s == 'E') { // exponent
       s++;
       saw_exponent = true;
       if (*s == '+' || *s == '-')  s++; // sign
       const char *first_non_digit = SkipDigits(s);
       if (first_non_digit == s) {
         Diag(TokLoc, diag::err_exponent_has_no_digits);
         return;
       } else {
         s = first_non_digit;
       }
     }
   }

   SuffixBegin = s;

   // Parse the suffix.  At this point we can classify whether we have an FP or
   // integer constant.
   bool isFPConstant = isFloatingLiteral();

   // Loop over all of the characters of the suffix.  If we see something bad,
   // we break out of the loop.
   for (; s != ThisTokEnd; ++s) {
     switch (*s) {
     case 'f':      // FP Suffix for "float"
     case 'F':
       if (!isFPConstant) break;  // Error for integer constant.
       if (isFloat || isLong) break; // FF, LF invalid.
       isFloat = true;
       continue;  // Success.
     case 'u':
     case 'U':
       if (isFPConstant) break;  // Error for floating constant.
       if (isUnsigned) break;    // Cannot be repeated.
       isUnsigned = true;
       continue;  // Success.
     case 'l':
     case 'L':
       if (isLong || isLongLong) break;  // Cannot be repeated.
       if (isFloat) break;               // LF invalid.

       // Check for long long.  The L's need to be adjacent and the same case.
       if (s+1 != ThisTokEnd && s[1] == s[0]) {
         if (isFPConstant) break;        // long long invalid for floats.
         isLongLong = true;
         ++s;  // Eat both of them.
       } else {
         isLong = true;
       }
       continue;  // Success.
     case 'i':
     case 'I':
     case 'j':
     case 'J':
       if (isImaginary) break;   // Cannot be repeated.
       PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, s-begin),
               diag::ext_imaginary_constant);
       isImaginary = true;
       continue;  // Success.
     }
     // If we reached here, there was an error.
     break;
   }

   // Report an error if there are any.
   if (s != ThisTokEnd) {
     TokLoc = PP.AdvanceToTokenCharacter(TokLoc, s-begin);
     Diag(TokLoc, isFPConstant ? diag::err_invalid_suffix_float_constant :
                                 diag::err_invalid_suffix_integer_constant,
          std::string(SuffixBegin, ThisTokEnd));
     return;
   }
 }

 /// GetIntegerValue - Convert this numeric literal value to an APInt that
 /// matches Val's input width.  If there is an overflow, set Val to the low bits
 /// of the result and return true.  Otherwise, return false.
 bool NumericLiteralParser::GetIntegerValue(llvm::APInt &Val) {
   Val = 0;
   s = DigitsBegin;

   llvm::APInt RadixVal(Val.getBitWidth(), radix);
   llvm::APInt CharVal(Val.getBitWidth(), 0);
   llvm::APInt OldVal = Val;

   bool OverflowOccurred = false;
   while (s < SuffixBegin) {
     unsigned C = HexDigitValue(*s++);

     // If this letter is out of bound for this radix, reject it.
     assert(C < radix && "NumericLiteralParser ctor should have rejected this");

     CharVal = C;

     // Add the digit to the value in the appropriate radix.  If adding in digits
     // made the value smaller, then this overflowed.
     OldVal = Val;

     // Multiply by radix, did overflow occur on the multiply?
     Val *= RadixVal;
     OverflowOccurred |= Val.udiv(RadixVal) != OldVal;

     OldVal = Val;
     // Add value, did overflow occur on the value?
     Val += CharVal;
     OverflowOccurred |= Val.ult(OldVal);
     OverflowOccurred |= Val.ult(CharVal);
   }
   return OverflowOccurred;
 }

 // GetFloatValue - Poor man's floatvalue (FIXME).
 float NumericLiteralParser::GetFloatValue() {
   char floatChars[256];
   strncpy(floatChars, ThisTokBegin, ThisTokEnd-ThisTokBegin);
   floatChars[ThisTokEnd-ThisTokBegin] = '\0';
   return (float)strtod(floatChars, 0);
 }

 void NumericLiteralParser::Diag(SourceLocation Loc, unsigned DiagID,
           const std::string &M) {
   PP.Diag(Loc, DiagID, M);
   hadError = true;
 }


 CharLiteralParser::CharLiteralParser(const char *begin, const char *end,
                                      SourceLocation Loc, Preprocessor &PP) {
   // At this point we know that the character matches the regex "L?'.*'".
   HadError = false;
   Value = 0;

   // Determine if this is a wide character.
   IsWide = begin[0] == 'L';
   if (IsWide) ++begin;

   // Skip over the entry quote.
   assert(begin[0] == '\'' && "Invalid token lexed");
   ++begin;

   // FIXME: This assumes that 'int' is 32-bits in overflow calculation, and the
   // size of "value".
   assert(PP.getTargetInfo().getIntWidth(Loc) == 32 &&
          "Assumes sizeof(int) == 4 for now");
   // FIXME: This assumes that wchar_t is 32-bits for now.
   assert(PP.getTargetInfo().getWCharWidth(Loc) == 32 &&
          "Assumes sizeof(wchar_t) == 4 for now");
   // FIXME: This extensively assumes that 'char' is 8-bits.
   assert(PP.getTargetInfo().getCharWidth(Loc) == 8 &&
          "Assumes char is 8 bits");

   bool isFirstChar = true;
   bool isMultiChar = false;
   while (begin[0] != '\'') {
     unsigned ResultChar;
     if (begin[0] != '\\')     // If this is a normal character, consume it.
       ResultChar = *begin++;
     else                      // Otherwise, this is an escape character.
       ResultChar = ProcessCharEscape(begin, end, HadError, Loc, IsWide, PP);

     // If this is a multi-character constant (e.g. 'abc'), handle it.  These are
     // implementation defined (C99 6.4.4.4p10).
     if (!isFirstChar) {
       // If this is the second character being processed, do special handling.
       if (!isMultiChar) {
         isMultiChar = true;

         // Warn about discarding the top bits for multi-char wide-character
         // constants (L'abcd').
         if (IsWide)
           PP.Diag(Loc, diag::warn_extraneous_wide_char_constant);
       }

       if (IsWide) {
         // Emulate GCC's (unintentional?) behavior: L'ab' -> L'b'.
         Value = 0;
       } else {
         // Narrow character literals act as though their value is concatenated
         // in this implementation.
         if (((Value << 8) >> 8) != Value)
           PP.Diag(Loc, diag::warn_char_constant_too_large);
         Value <<= 8;
       }
     }

     Value += ResultChar;
     isFirstChar = false;
   }

   // If this is a single narrow character, sign extend it (e.g. '\xFF' is "-1")
   // if 'char' is signed for this target (C99 6.4.4.4p10).  Note that multiple
   // character constants are not sign extended in the this implementation:
   // '\xFF\xFF' = 65536 and '\x0\xFF' = 255, which matches GCC.
   if (!IsWide && !isMultiChar && (Value & 128) &&
       PP.getTargetInfo().isCharSigned(Loc))
     Value = (signed char)Value;
 }


 ///       string-literal: [C99 6.4.5]
 ///          " [s-char-sequence] "
 ///         L" [s-char-sequence] "
 ///       s-char-sequence:
 ///         s-char
 ///         s-char-sequence s-char
 ///       s-char:
 ///         any source character except the double quote ",
 ///           backslash \, or newline character
 ///         escape-character
 ///         universal-character-name
 ///       escape-character: [C99 6.4.4.4]
 ///         \ escape-code
 ///         universal-character-name
 ///       escape-code:
 ///         character-escape-code
 ///         octal-escape-code
 ///         hex-escape-code
 ///       character-escape-code: one of
 ///         n t b r f v a
 ///         \ ' " ?
 ///       octal-escape-code:
 ///         octal-digit
 ///         octal-digit octal-digit
 ///         octal-digit octal-digit octal-digit
 ///       hex-escape-code:
 ///         x hex-digit
 ///         hex-escape-code hex-digit
 ///       universal-character-name:
 ///         \u hex-quad
 ///         \U hex-quad hex-quad
 ///       hex-quad:
 ///         hex-digit hex-digit hex-digit hex-digit
 ///
 StringLiteralParser::
 StringLiteralParser(const Token *StringToks, unsigned NumStringToks,
                     Preprocessor &pp, TargetInfo &t)
   : PP(pp), Target(t) {
   // Scan all of the string portions, remember the max individual token length,
   // computing a bound on the concatenated string length, and see whether any
   // piece is a wide-string.  If any of the string portions is a wide-string
   // literal, the result is a wide-string literal [C99 6.4.5p4].
   MaxTokenLength = StringToks[0].getLength();
   SizeBound = StringToks[0].getLength()-2;  // -2 for "".
   AnyWide = StringToks[0].getKind() == tok::wide_string_literal;

   hadError = false;

   // Implement Translation Phase #6: concatenation of string literals
   /// (C99 5.1.1.2p1).  The common case is only one string fragment.
   for (unsigned i = 1; i != NumStringToks; ++i) {
     // The string could be shorter than this if it needs cleaning, but this is a
     // reasonable bound, which is all we need.
     SizeBound += StringToks[i].getLength()-2;  // -2 for "".

     // Remember maximum string piece length.
     if (StringToks[i].getLength() > MaxTokenLength)
       MaxTokenLength = StringToks[i].getLength();

     // Remember if we see any wide strings.
     AnyWide |= StringToks[i].getKind() == tok::wide_string_literal;
   }


   // Include space for the null terminator.
   ++SizeBound;

   // TODO: K&R warning: "traditional C rejects string constant concatenation"

   // Get the width in bytes of wchar_t.  If no wchar_t strings are used, do not
   // query the target.  As such, wchar_tByteWidth is only valid if AnyWide=true.
   wchar_tByteWidth = ~0U;
   if (AnyWide) {
     wchar_tByteWidth = Target.getWCharWidth(StringToks[0].getLocation());
     assert((wchar_tByteWidth & 7) == 0 && "Assumes wchar_t is byte multiple!");
     wchar_tByteWidth /= 8;
   }

   // The output buffer size needs to be large enough to hold wide characters.
   // This is a worst-case assumption which basically corresponds to L"" "long".
   if (AnyWide)
     SizeBound *= wchar_tByteWidth;

   // Size the temporary buffer to hold the result string data.
   ResultBuf.resize(SizeBound);

   // Likewise, but for each string piece.
   llvm::SmallString<512> TokenBuf;
   TokenBuf.resize(MaxTokenLength);

   // Loop over all the strings, getting their spelling, and expanding them to
   // wide strings as appropriate.
   ResultPtr = &ResultBuf[0];   // Next byte to fill in.

   for (unsigned i = 0, e = NumStringToks; i != e; ++i) {
     const char *ThisTokBuf = &TokenBuf[0];
     // Get the spelling of the token, which eliminates trigraphs, etc.  We know
     // that ThisTokBuf points to a buffer that is big enough for the whole token
     // and 'spelled' tokens can only shrink.
     unsigned ThisTokLen = PP.getSpelling(StringToks[i], ThisTokBuf);
     const char *ThisTokEnd = ThisTokBuf+ThisTokLen-1;  // Skip end quote.

     // TODO: Input character set mapping support.

     // Skip L marker for wide strings.
     bool ThisIsWide = false;
     if (ThisTokBuf[0] == 'L') {
       ++ThisTokBuf;
       ThisIsWide = true;
     }

     assert(ThisTokBuf[0] == '"' && "Expected quote, lexer broken?");
     ++ThisTokBuf;

     while (ThisTokBuf != ThisTokEnd) {
       // Is this a span of non-escape characters?
       if (ThisTokBuf[0] != '\\') {
         const char *InStart = ThisTokBuf;
         do {
           ++ThisTokBuf;
         } while (ThisTokBuf != ThisTokEnd && ThisTokBuf[0] != '\\');

         // Copy the character span over.
         unsigned Len = ThisTokBuf-InStart;
         if (!AnyWide) {
           memcpy(ResultPtr, InStart, Len);
           ResultPtr += Len;
         } else {
           // Note: our internal rep of wide char tokens is always little-endian.
           for (; Len; --Len, ++InStart) {
             *ResultPtr++ = InStart[0];
             // Add zeros at the end.
             for (unsigned i = 1, e = wchar_tByteWidth; i != e; ++i)
             *ResultPtr++ = 0;
           }
         }
         continue;
       }

       // Otherwise, this is an escape character.  Process it.
       unsigned ResultChar = ProcessCharEscape(ThisTokBuf, ThisTokEnd, hadError,
                                               StringToks[i].getLocation(),
                                               ThisIsWide, PP);

       // Note: our internal rep of wide char tokens is always little-endian.
       *ResultPtr++ = ResultChar & 0xFF;

       if (AnyWide) {
         for (unsigned i = 1, e = wchar_tByteWidth; i != e; ++i)
           *ResultPtr++ = ResultChar >> i*8;
       }
     }
   }

   // Add zero terminator.
   *ResultPtr = 0;
   if (AnyWide) {
     for (unsigned i = 1, e = wchar_tByteWidth; i != e; ++i)
     *ResultPtr++ = 0;
   }
 }
	//===--- LiteralSupport.cpp - Code to parse and process literals ----------===//
	//
	// The LLVM Compiler Infrastructure
	//
	// This file was developed by Steve Naroff and is distributed under
	// the University of Illinois Open Source License. See LICENSE.TXT for details.
	//
	//===----------------------------------------------------------------------===//
	//
	// This file implements the NumericLiteralParser, CharLiteralParser, and
	// StringLiteralParser interfaces.
	//
	//===----------------------------------------------------------------------===//

	#include "clang/Lex/LiteralSupport.h"
	#include "clang/Lex/Preprocessor.h"
	#include "clang/Basic/Diagnostic.h"
	#include "clang/Basic/SourceManager.h"
	#include "clang/Basic/TargetInfo.h"
	#include "llvm/ADT/APInt.h"
	#include "llvm/ADT/StringExtras.h"
	using namespace clang;

	/// HexDigitValue - Return the value of the specified hex digit, or -1 if it's
	/// not valid.
	static int HexDigitValue(char C) {
	if (C >= '0' && C <= '9') return C-'0';
	if (C >= 'a' && C <= 'f') return C-'a'+10;
	if (C >= 'A' && C <= 'F') return C-'A'+10;
	return -1;
	}

	/// ProcessCharEscape - Parse a standard C escape sequence, which can occur in
	/// either a character or a string literal.
	static unsigned ProcessCharEscape(const char *&ThisTokBuf,
	const char *ThisTokEnd, bool &HadError,
	SourceLocation Loc, bool IsWide,
	Preprocessor &PP) {
	// Skip the '\' char.
	++ThisTokBuf;

	// We know that this character can't be off the end of the buffer, because
	// that would have been \", which would not have been the end of string.
	unsigned ResultChar = *ThisTokBuf++;
	switch (ResultChar) {
	// These map to themselves.
	case '\\': case '\'': case '"': case '?': break;

	// These have fixed mappings.
	case 'a':
	// TODO: K&R: the meaning of '\\a' is different in traditional C
	ResultChar = 7;
	break;
	case 'b':
	ResultChar = 8;
	break;
	case 'e':
	PP.Diag(Loc, diag::ext_nonstandard_escape, "e");
	ResultChar = 27;
	break;
	case 'f':
	ResultChar = 12;
	break;
	case 'n':
	ResultChar = 10;
	break;
	case 'r':
	ResultChar = 13;
	break;
	case 't':
	ResultChar = 9;
	break;
	case 'v':
	ResultChar = 11;
	break;

	//case 'u': case 'U': // FIXME: UCNs.
	case 'x': { // Hex escape.
	ResultChar = 0;
	if (ThisTokBuf == ThisTokEnd \|\| !isxdigit(*ThisTokBuf)) {
	PP.Diag(Loc, diag::err_hex_escape_no_digits);
	HadError = 1;
	break;
	}

	// Hex escapes are a maximal series of hex digits.
	bool Overflow = false;
	for (; ThisTokBuf != ThisTokEnd; ++ThisTokBuf) {
	int CharVal = HexDigitValue(ThisTokBuf[0]);
	if (CharVal == -1) break;
	Overflow \|= (ResultChar & 0xF0000000) ? true : false; // About to shift out a digit?
	ResultChar <<= 4;
	ResultChar \|= CharVal;
	}

	// See if any bits will be truncated when evaluated as a character.
	unsigned CharWidth = IsWide ? PP.getTargetInfo().getWCharWidth(Loc)
	: PP.getTargetInfo().getCharWidth(Loc);
	if (CharWidth != 32 && (ResultChar >> CharWidth) != 0) {
	Overflow = true;
	ResultChar &= ~0U >> (32-CharWidth);
	}

	// Check for overflow.
	if (Overflow) // Too many digits to fit in
	PP.Diag(Loc, diag::warn_hex_escape_too_large);
	break;
	}
	case '0': case '1': case '2': case '3':
	case '4': case '5': case '6': case '7': {
	// Octal escapes.
	--ThisTokBuf;
	ResultChar = 0;

	// Octal escapes are a series of octal digits with maximum length 3.
	// "\0123" is a two digit sequence equal to "\012" "3".
	unsigned NumDigits = 0;
	do {
	ResultChar <<= 3;
	ResultChar \|= *ThisTokBuf++ - '0';
	++NumDigits;
	} while (ThisTokBuf != ThisTokEnd && NumDigits < 3 &&
	ThisTokBuf[0] >= '0' && ThisTokBuf[0] <= '7');

	// Check for overflow. Reject '\777', but not L'\777'.
	unsigned CharWidth = IsWide ? PP.getTargetInfo().getWCharWidth(Loc)
	: PP.getTargetInfo().getCharWidth(Loc);
	if (CharWidth != 32 && (ResultChar >> CharWidth) != 0) {
	PP.Diag(Loc, diag::warn_octal_escape_too_large);
	ResultChar &= ~0U >> (32-CharWidth);
	}
	break;
	}

	// Otherwise, these are not valid escapes.
	case '(': case '{': case '[': case '%':
	// GCC accepts these as extensions. We warn about them as such though.
	if (!PP.getLangOptions().NoExtensions) {
	PP.Diag(Loc, diag::ext_nonstandard_escape,
	std::string()+(char)ResultChar);
	break;
	}
	// FALL THROUGH.
	default:
	if (isgraph(ThisTokBuf[0])) {
	PP.Diag(Loc, diag::ext_unknown_escape, std::string()+(char)ResultChar);
	} else {
	PP.Diag(Loc, diag::ext_unknown_escape, "x"+llvm::utohexstr(ResultChar));
	}
	break;
	}

	return ResultChar;
	}




	/// integer-constant: [C99 6.4.4.1]
	/// decimal-constant integer-suffix
	/// octal-constant integer-suffix
	/// hexadecimal-constant integer-suffix
	/// decimal-constant:
	/// nonzero-digit
	/// decimal-constant digit
	/// octal-constant:
	/// 0
	/// octal-constant octal-digit
	/// hexadecimal-constant:
	/// hexadecimal-prefix hexadecimal-digit
	/// hexadecimal-constant hexadecimal-digit
	/// hexadecimal-prefix: one of
	/// 0x 0X
	/// integer-suffix:
	/// unsigned-suffix [long-suffix]
	/// unsigned-suffix [long-long-suffix]
	/// long-suffix [unsigned-suffix]
	/// long-long-suffix [unsigned-sufix]
	/// nonzero-digit:
	/// 1 2 3 4 5 6 7 8 9
	/// octal-digit:
	/// 0 1 2 3 4 5 6 7
	/// hexadecimal-digit:
	/// 0 1 2 3 4 5 6 7 8 9
	/// a b c d e f
	/// A B C D E F
	/// unsigned-suffix: one of
	/// u U
	/// long-suffix: one of
	/// l L
	/// long-long-suffix: one of
	/// ll LL
	///
	/// floating-constant: [C99 6.4.4.2]
	/// TODO: add rules...
	///

	NumericLiteralParser::
	NumericLiteralParser(const char begin, const char end,
	SourceLocation TokLoc, Preprocessor &pp)
	: PP(pp), ThisTokBegin(begin), ThisTokEnd(end) {
	s = DigitsBegin = begin;
	saw_exponent = false;
	saw_period = false;
	isLong = false;
	isUnsigned = false;
	isLongLong = false;
	isFloat = false;
	isImaginary = false;
	hadError = false;

	if (*s == '0') { // parse radix
	s++;
	if ((s == 'x' \|\| s == 'X') && (isxdigit(s[1]) \|\| s[1] == '.')) {
	s++;
	radix = 16;
	DigitsBegin = s;
	s = SkipHexDigits(s);
	if (s == ThisTokEnd) {
	// Done.
	} else if (*s == '.') {
	s++;
	saw_period = true;
	s = SkipHexDigits(s);
	}
	// A binary exponent can appear with or with a '.'. If dotted, the
	// binary exponent is required.
	if (s == 'p' \|\| s == 'P') {
	s++;
	saw_exponent = true;
	if (s == '+' \|\| s == '-') s++; // sign
	const char *first_non_digit = SkipDigits(s);
	if (first_non_digit == s) {
	Diag(TokLoc, diag::err_exponent_has_no_digits);
	return;
	} else {
	s = first_non_digit;
	}
	} else if (saw_period) {
	Diag(TokLoc, diag::err_hexconstant_requires_exponent);
	return;
	}
	} else if (s == 'b' \|\| s == 'B') {
	// 0b101010 is a GCC extension.
	++s;
	radix = 2;
	DigitsBegin = s;
	s = SkipBinaryDigits(s);
	if (s == ThisTokEnd) {
	// Done.
	} else if (isxdigit(*s)) {
	Diag(TokLoc, diag::err_invalid_binary_digit, std::string(s, s+1));
	return;
	}
	PP.Diag(TokLoc, diag::ext_binary_literal);
	} else {
	// For now, the radix is set to 8. If we discover that we have a
	// floating point constant, the radix will change to 10. Octal floating
	// point constants are not permitted (only decimal and hexadecimal).
	radix = 8;
	DigitsBegin = s;
	s = SkipOctalDigits(s);
	if (s == ThisTokEnd) {
	// Done.
	} else if (isxdigit(*s)) {
	TokLoc = PP.AdvanceToTokenCharacter(TokLoc, s-begin);
	Diag(TokLoc, diag::err_invalid_octal_digit, std::string(s, s+1));
	return;
	} else if (*s == '.') {
	s++;
	radix = 10;
	saw_period = true;
	s = SkipDigits(s);
	}
	if (s == 'e' \|\| s == 'E') { // exponent
	s++;
	radix = 10;
	saw_exponent = true;
	if (s == '+' \|\| s == '-') s++; // sign
	const char *first_non_digit = SkipDigits(s);
	if (first_non_digit == s) {
	Diag(TokLoc, diag::err_exponent_has_no_digits);
	return;
	} else {
	s = first_non_digit;
	}
	}
	}
	} else { // the first digit is non-zero
	radix = 10;
	s = SkipDigits(s);
	if (s == ThisTokEnd) {
	// Done.
	} else if (isxdigit(*s)) {
	Diag(TokLoc, diag::err_invalid_decimal_digit, std::string(s, s+1));
	return;
	} else if (*s == '.') {
	s++;
	saw_period = true;
	s = SkipDigits(s);
	}
	if (s == 'e' \|\| s == 'E') { // exponent
	s++;
	saw_exponent = true;
	if (s == '+' \|\| s == '-') s++; // sign
	const char *first_non_digit = SkipDigits(s);
	if (first_non_digit == s) {
	Diag(TokLoc, diag::err_exponent_has_no_digits);
	return;
	} else {
	s = first_non_digit;
	}
	}
	}

	SuffixBegin = s;

	// Parse the suffix. At this point we can classify whether we have an FP or
	// integer constant.
	bool isFPConstant = isFloatingLiteral();

	// Loop over all of the characters of the suffix. If we see something bad,
	// we break out of the loop.
	for (; s != ThisTokEnd; ++s) {
	switch (*s) {
	case 'f': // FP Suffix for "float"
	case 'F':
	if (!isFPConstant) break; // Error for integer constant.
	if (isFloat \|\| isLong) break; // FF, LF invalid.
	isFloat = true;
	continue; // Success.
	case 'u':
	case 'U':
	if (isFPConstant) break; // Error for floating constant.
	if (isUnsigned) break; // Cannot be repeated.
	isUnsigned = true;
	continue; // Success.
	case 'l':
	case 'L':
	if (isLong \|\| isLongLong) break; // Cannot be repeated.
	if (isFloat) break; // LF invalid.

	// Check for long long. The L's need to be adjacent and the same case.
	if (s+1 != ThisTokEnd && s[1] == s[0]) {
	if (isFPConstant) break; // long long invalid for floats.
	isLongLong = true;
	++s; // Eat both of them.
	} else {
	isLong = true;
	}
	continue; // Success.
	case 'i':
	case 'I':
	case 'j':
	case 'J':
	if (isImaginary) break; // Cannot be repeated.
	PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, s-begin),
	diag::ext_imaginary_constant);
	isImaginary = true;
	continue; // Success.
	}
	// If we reached here, there was an error.
	break;
	}

	// Report an error if there are any.
	if (s != ThisTokEnd) {
	TokLoc = PP.AdvanceToTokenCharacter(TokLoc, s-begin);
	Diag(TokLoc, isFPConstant ? diag::err_invalid_suffix_float_constant :
	diag::err_invalid_suffix_integer_constant,
	std::string(SuffixBegin, ThisTokEnd));
	return;
	}
	}

	/// GetIntegerValue - Convert this numeric literal value to an APInt that
	/// matches Val's input width. If there is an overflow, set Val to the low bits
	/// of the result and return true. Otherwise, return false.
	bool NumericLiteralParser::GetIntegerValue(llvm::APInt &Val) {
	Val = 0;
	s = DigitsBegin;

	llvm::APInt RadixVal(Val.getBitWidth(), radix);
	llvm::APInt CharVal(Val.getBitWidth(), 0);
	llvm::APInt OldVal = Val;

	bool OverflowOccurred = false;
	while (s < SuffixBegin) {
	unsigned C = HexDigitValue(*s++);

	// If this letter is out of bound for this radix, reject it.
	assert(C < radix && "NumericLiteralParser ctor should have rejected this");

	CharVal = C;

	// Add the digit to the value in the appropriate radix. If adding in digits
	// made the value smaller, then this overflowed.
	OldVal = Val;

	// Multiply by radix, did overflow occur on the multiply?
	Val *= RadixVal;
	OverflowOccurred \|= Val.udiv(RadixVal) != OldVal;

	OldVal = Val;
	// Add value, did overflow occur on the value?
	Val += CharVal;
	OverflowOccurred \|= Val.ult(OldVal);
	OverflowOccurred \|= Val.ult(CharVal);
	}
	return OverflowOccurred;
	}

	// GetFloatValue - Poor man's floatvalue (FIXME).
	float NumericLiteralParser::GetFloatValue() {
	char floatChars[256];
	strncpy(floatChars, ThisTokBegin, ThisTokEnd-ThisTokBegin);
	floatChars[ThisTokEnd-ThisTokBegin] = '\0';
	return (float)strtod(floatChars, 0);
	}

	void NumericLiteralParser::Diag(SourceLocation Loc, unsigned DiagID,
	const std::string &M) {
	PP.Diag(Loc, DiagID, M);
	hadError = true;
	}


	CharLiteralParser::CharLiteralParser(const char begin, const char end,
	SourceLocation Loc, Preprocessor &PP) {
	// At this point we know that the character matches the regex "L?'.*'".
	HadError = false;
	Value = 0;

	// Determine if this is a wide character.
	IsWide = begin[0] == 'L';
	if (IsWide) ++begin;

	// Skip over the entry quote.
	assert(begin[0] == '\'' && "Invalid token lexed");
	++begin;

	// FIXME: This assumes that 'int' is 32-bits in overflow calculation, and the
	// size of "value".
	assert(PP.getTargetInfo().getIntWidth(Loc) == 32 &&
	"Assumes sizeof(int) == 4 for now");
	// FIXME: This assumes that wchar_t is 32-bits for now.
	assert(PP.getTargetInfo().getWCharWidth(Loc) == 32 &&
	"Assumes sizeof(wchar_t) == 4 for now");
	// FIXME: This extensively assumes that 'char' is 8-bits.
	assert(PP.getTargetInfo().getCharWidth(Loc) == 8 &&
	"Assumes char is 8 bits");

	bool isFirstChar = true;
	bool isMultiChar = false;
	while (begin[0] != '\'') {
	unsigned ResultChar;
	if (begin[0] != '\\') // If this is a normal character, consume it.
	ResultChar = *begin++;
	else // Otherwise, this is an escape character.
	ResultChar = ProcessCharEscape(begin, end, HadError, Loc, IsWide, PP);

	// If this is a multi-character constant (e.g. 'abc'), handle it. These are
	// implementation defined (C99 6.4.4.4p10).
	if (!isFirstChar) {
	// If this is the second character being processed, do special handling.
	if (!isMultiChar) {
	isMultiChar = true;

	// Warn about discarding the top bits for multi-char wide-character
	// constants (L'abcd').
	if (IsWide)
	PP.Diag(Loc, diag::warn_extraneous_wide_char_constant);
	}

	if (IsWide) {
	// Emulate GCC's (unintentional?) behavior: L'ab' -> L'b'.
	Value = 0;
	} else {
	// Narrow character literals act as though their value is concatenated
	// in this implementation.
	if (((Value << 8) >> 8) != Value)
	PP.Diag(Loc, diag::warn_char_constant_too_large);
	Value <<= 8;
	}
	}

	Value += ResultChar;
	isFirstChar = false;
	}

	// If this is a single narrow character, sign extend it (e.g. '\xFF' is "-1")
	// if 'char' is signed for this target (C99 6.4.4.4p10). Note that multiple
	// character constants are not sign extended in the this implementation:
	// '\xFF\xFF' = 65536 and '\x0\xFF' = 255, which matches GCC.
	if (!IsWide && !isMultiChar && (Value & 128) &&
	PP.getTargetInfo().isCharSigned(Loc))
	Value = (signed char)Value;
	}


	/// string-literal: [C99 6.4.5]
	/// " [s-char-sequence] "
	/// L" [s-char-sequence] "
	/// s-char-sequence:
	/// s-char
	/// s-char-sequence s-char
	/// s-char:
	/// any source character except the double quote ",
	/// backslash \, or newline character
	/// escape-character
	/// universal-character-name
	/// escape-character: [C99 6.4.4.4]
	/// \ escape-code
	/// universal-character-name
	/// escape-code:
	/// character-escape-code
	/// octal-escape-code
	/// hex-escape-code
	/// character-escape-code: one of
	/// n t b r f v a
	/// \ ' " ?
	/// octal-escape-code:
	/// octal-digit
	/// octal-digit octal-digit
	/// octal-digit octal-digit octal-digit
	/// hex-escape-code:
	/// x hex-digit
	/// hex-escape-code hex-digit
	/// universal-character-name:
	/// \u hex-quad
	/// \U hex-quad hex-quad
	/// hex-quad:
	/// hex-digit hex-digit hex-digit hex-digit
	///
	StringLiteralParser::
	StringLiteralParser(const Token *StringToks, unsigned NumStringToks,
	Preprocessor &pp, TargetInfo &t)
	: PP(pp), Target(t) {
	// Scan all of the string portions, remember the max individual token length,
	// computing a bound on the concatenated string length, and see whether any
	// piece is a wide-string. If any of the string portions is a wide-string
	// literal, the result is a wide-string literal [C99 6.4.5p4].
	MaxTokenLength = StringToks[0].getLength();
	SizeBound = StringToks[0].getLength()-2; // -2 for "".
	AnyWide = StringToks[0].getKind() == tok::wide_string_literal;

	hadError = false;

	// Implement Translation Phase #6: concatenation of string literals
	/// (C99 5.1.1.2p1). The common case is only one string fragment.
	for (unsigned i = 1; i != NumStringToks; ++i) {
	// The string could be shorter than this if it needs cleaning, but this is a
	// reasonable bound, which is all we need.
	SizeBound += StringToks[i].getLength()-2; // -2 for "".

	// Remember maximum string piece length.
	if (StringToks[i].getLength() > MaxTokenLength)
	MaxTokenLength = StringToks[i].getLength();

	// Remember if we see any wide strings.
	AnyWide \|= StringToks[i].getKind() == tok::wide_string_literal;
	}


	// Include space for the null terminator.
	++SizeBound;

	// TODO: K&R warning: "traditional C rejects string constant concatenation"

	// Get the width in bytes of wchar_t. If no wchar_t strings are used, do not
	// query the target. As such, wchar_tByteWidth is only valid if AnyWide=true.
	wchar_tByteWidth = ~0U;
	if (AnyWide) {
	wchar_tByteWidth = Target.getWCharWidth(StringToks[0].getLocation());
	assert((wchar_tByteWidth & 7) == 0 && "Assumes wchar_t is byte multiple!");
	wchar_tByteWidth /= 8;
	}

	// The output buffer size needs to be large enough to hold wide characters.
	// This is a worst-case assumption which basically corresponds to L"" "long".
	if (AnyWide)
	SizeBound *= wchar_tByteWidth;

	// Size the temporary buffer to hold the result string data.
	ResultBuf.resize(SizeBound);

	// Likewise, but for each string piece.
	llvm::SmallString<512> TokenBuf;
	TokenBuf.resize(MaxTokenLength);

	// Loop over all the strings, getting their spelling, and expanding them to
	// wide strings as appropriate.
	ResultPtr = &ResultBuf[0]; // Next byte to fill in.

	for (unsigned i = 0, e = NumStringToks; i != e; ++i) {
	const char *ThisTokBuf = &TokenBuf[0];
	// Get the spelling of the token, which eliminates trigraphs, etc. We know
	// that ThisTokBuf points to a buffer that is big enough for the whole token
	// and 'spelled' tokens can only shrink.
	unsigned ThisTokLen = PP.getSpelling(StringToks[i], ThisTokBuf);
	const char *ThisTokEnd = ThisTokBuf+ThisTokLen-1; // Skip end quote.

	// TODO: Input character set mapping support.

	// Skip L marker for wide strings.
	bool ThisIsWide = false;
	if (ThisTokBuf[0] == 'L') {
	++ThisTokBuf;
	ThisIsWide = true;
	}

	assert(ThisTokBuf[0] == '"' && "Expected quote, lexer broken?");
	++ThisTokBuf;

	while (ThisTokBuf != ThisTokEnd) {
	// Is this a span of non-escape characters?
	if (ThisTokBuf[0] != '\\') {
	const char *InStart = ThisTokBuf;
	do {
	++ThisTokBuf;
	} while (ThisTokBuf != ThisTokEnd && ThisTokBuf[0] != '\\');

	// Copy the character span over.
	unsigned Len = ThisTokBuf-InStart;
	if (!AnyWide) {
	memcpy(ResultPtr, InStart, Len);
	ResultPtr += Len;
	} else {
	// Note: our internal rep of wide char tokens is always little-endian.
	for (; Len; --Len, ++InStart) {
	*ResultPtr++ = InStart[0];
	// Add zeros at the end.
	for (unsigned i = 1, e = wchar_tByteWidth; i != e; ++i)
	*ResultPtr++ = 0;
	}
	}
	continue;
	}

	// Otherwise, this is an escape character. Process it.
	unsigned ResultChar = ProcessCharEscape(ThisTokBuf, ThisTokEnd, hadError,
	StringToks[i].getLocation(),
	ThisIsWide, PP);

	// Note: our internal rep of wide char tokens is always little-endian.
	*ResultPtr++ = ResultChar & 0xFF;

	if (AnyWide) {
	for (unsigned i = 1, e = wchar_tByteWidth; i != e; ++i)
	ResultPtr++ = ResultChar >> i8;
	}
	}
	}

	// Add zero terminator.
	*ResultPtr = 0;
	if (AnyWide) {
	for (unsigned i = 1, e = wchar_tByteWidth; i != e; ++i)
	*ResultPtr++ = 0;
	}
	}