| Reid Spencer | 5f016e2 | 2007-07-11 17:01:13 +0000 | [diff] [blame] | 1 | //===--- LiteralSupport.cpp - Code to parse and process literals ----------===// | 
|  | 2 | // | 
|  | 3 | //                     The LLVM Compiler Infrastructure | 
|  | 4 | // | 
| Chris Lattner | 0bc735f | 2007-12-29 19:59:25 +0000 | [diff] [blame] | 5 | // This file is distributed under the University of Illinois Open Source | 
|  | 6 | // License. See LICENSE.TXT for details. | 
| Reid Spencer | 5f016e2 | 2007-07-11 17:01:13 +0000 | [diff] [blame] | 7 | // | 
|  | 8 | //===----------------------------------------------------------------------===// | 
|  | 9 | // | 
|  | 10 | // This file implements the NumericLiteralParser, CharLiteralParser, and | 
|  | 11 | // StringLiteralParser interfaces. | 
|  | 12 | // | 
|  | 13 | //===----------------------------------------------------------------------===// | 
|  | 14 |  | 
|  | 15 | #include "clang/Lex/LiteralSupport.h" | 
|  | 16 | #include "clang/Lex/Preprocessor.h" | 
| Chris Lattner | 500d329 | 2009-01-29 05:15:15 +0000 | [diff] [blame] | 17 | #include "clang/Lex/LexDiagnostic.h" | 
| Chris Lattner | 136f93a | 2007-07-16 06:55:01 +0000 | [diff] [blame] | 18 | #include "clang/Basic/TargetInfo.h" | 
| Eli Friedman | f74a458 | 2011-11-01 02:14:50 +0000 | [diff] [blame] | 19 | #include "clang/Basic/ConvertUTF.h" | 
| Reid Spencer | 5f016e2 | 2007-07-11 17:01:13 +0000 | [diff] [blame] | 20 | #include "llvm/ADT/StringExtras.h" | 
| David Blaikie | 9fe8c74 | 2011-09-23 05:35:21 +0000 | [diff] [blame] | 21 | #include "llvm/Support/ErrorHandling.h" | 
| Reid Spencer | 5f016e2 | 2007-07-11 17:01:13 +0000 | [diff] [blame] | 22 | using namespace clang; | 
|  | 23 |  | 
|  | 24 | /// HexDigitValue - Return the value of the specified hex digit, or -1 if it's | 
|  | 25 | /// not valid. | 
|  | 26 | static int HexDigitValue(char C) { | 
|  | 27 | if (C >= '0' && C <= '9') return C-'0'; | 
|  | 28 | if (C >= 'a' && C <= 'f') return C-'a'+10; | 
|  | 29 | if (C >= 'A' && C <= 'F') return C-'A'+10; | 
|  | 30 | return -1; | 
|  | 31 | } | 
|  | 32 |  | 
| Douglas Gregor | 5cee119 | 2011-07-27 05:40:30 +0000 | [diff] [blame] | 33 | static unsigned getCharWidth(tok::TokenKind kind, const TargetInfo &Target) { | 
|  | 34 | switch (kind) { | 
| David Blaikie | b219cfc | 2011-09-23 05:06:16 +0000 | [diff] [blame] | 35 | default: llvm_unreachable("Unknown token type!"); | 
| Douglas Gregor | 5cee119 | 2011-07-27 05:40:30 +0000 | [diff] [blame] | 36 | case tok::char_constant: | 
|  | 37 | case tok::string_literal: | 
|  | 38 | case tok::utf8_string_literal: | 
|  | 39 | return Target.getCharWidth(); | 
|  | 40 | case tok::wide_char_constant: | 
|  | 41 | case tok::wide_string_literal: | 
|  | 42 | return Target.getWCharWidth(); | 
|  | 43 | case tok::utf16_char_constant: | 
|  | 44 | case tok::utf16_string_literal: | 
|  | 45 | return Target.getChar16Width(); | 
|  | 46 | case tok::utf32_char_constant: | 
|  | 47 | case tok::utf32_string_literal: | 
|  | 48 | return Target.getChar32Width(); | 
|  | 49 | } | 
|  | 50 | } | 
|  | 51 |  | 
| Reid Spencer | 5f016e2 | 2007-07-11 17:01:13 +0000 | [diff] [blame] | 52 | /// ProcessCharEscape - Parse a standard C escape sequence, which can occur in | 
|  | 53 | /// either a character or a string literal. | 
|  | 54 | static unsigned ProcessCharEscape(const char *&ThisTokBuf, | 
|  | 55 | const char *ThisTokEnd, bool &HadError, | 
| Douglas Gregor | 5cee119 | 2011-07-27 05:40:30 +0000 | [diff] [blame] | 56 | FullSourceLoc Loc, unsigned CharWidth, | 
| David Blaikie | d6471f7 | 2011-09-25 23:23:43 +0000 | [diff] [blame] | 57 | DiagnosticsEngine *Diags) { | 
| Reid Spencer | 5f016e2 | 2007-07-11 17:01:13 +0000 | [diff] [blame] | 58 | // Skip the '\' char. | 
|  | 59 | ++ThisTokBuf; | 
|  | 60 |  | 
|  | 61 | // We know that this character can't be off the end of the buffer, because | 
|  | 62 | // that would have been \", which would not have been the end of string. | 
|  | 63 | unsigned ResultChar = *ThisTokBuf++; | 
|  | 64 | switch (ResultChar) { | 
|  | 65 | // These map to themselves. | 
|  | 66 | case '\\': case '\'': case '"': case '?': break; | 
| Mike Stump | 1eb4433 | 2009-09-09 15:08:12 +0000 | [diff] [blame] | 67 |  | 
| Reid Spencer | 5f016e2 | 2007-07-11 17:01:13 +0000 | [diff] [blame] | 68 | // These have fixed mappings. | 
|  | 69 | case 'a': | 
|  | 70 | // TODO: K&R: the meaning of '\\a' is different in traditional C | 
|  | 71 | ResultChar = 7; | 
|  | 72 | break; | 
|  | 73 | case 'b': | 
|  | 74 | ResultChar = 8; | 
|  | 75 | break; | 
|  | 76 | case 'e': | 
| Chris Lattner | 91f54ce | 2010-11-17 06:26:08 +0000 | [diff] [blame] | 77 | if (Diags) | 
|  | 78 | Diags->Report(Loc, diag::ext_nonstandard_escape) << "e"; | 
| Reid Spencer | 5f016e2 | 2007-07-11 17:01:13 +0000 | [diff] [blame] | 79 | ResultChar = 27; | 
|  | 80 | break; | 
| Eli Friedman | 3c54801 | 2009-06-10 01:32:39 +0000 | [diff] [blame] | 81 | case 'E': | 
| Chris Lattner | 91f54ce | 2010-11-17 06:26:08 +0000 | [diff] [blame] | 82 | if (Diags) | 
|  | 83 | Diags->Report(Loc, diag::ext_nonstandard_escape) << "E"; | 
| Eli Friedman | 3c54801 | 2009-06-10 01:32:39 +0000 | [diff] [blame] | 84 | ResultChar = 27; | 
|  | 85 | break; | 
| Reid Spencer | 5f016e2 | 2007-07-11 17:01:13 +0000 | [diff] [blame] | 86 | case 'f': | 
|  | 87 | ResultChar = 12; | 
|  | 88 | break; | 
|  | 89 | case 'n': | 
|  | 90 | ResultChar = 10; | 
|  | 91 | break; | 
|  | 92 | case 'r': | 
|  | 93 | ResultChar = 13; | 
|  | 94 | break; | 
|  | 95 | case 't': | 
|  | 96 | ResultChar = 9; | 
|  | 97 | break; | 
|  | 98 | case 'v': | 
|  | 99 | ResultChar = 11; | 
|  | 100 | break; | 
| Reid Spencer | 5f016e2 | 2007-07-11 17:01:13 +0000 | [diff] [blame] | 101 | case 'x': { // Hex escape. | 
|  | 102 | ResultChar = 0; | 
|  | 103 | if (ThisTokBuf == ThisTokEnd || !isxdigit(*ThisTokBuf)) { | 
| Chris Lattner | 91f54ce | 2010-11-17 06:26:08 +0000 | [diff] [blame] | 104 | if (Diags) | 
|  | 105 | Diags->Report(Loc, diag::err_hex_escape_no_digits); | 
| Reid Spencer | 5f016e2 | 2007-07-11 17:01:13 +0000 | [diff] [blame] | 106 | HadError = 1; | 
|  | 107 | break; | 
|  | 108 | } | 
| Mike Stump | 1eb4433 | 2009-09-09 15:08:12 +0000 | [diff] [blame] | 109 |  | 
| Reid Spencer | 5f016e2 | 2007-07-11 17:01:13 +0000 | [diff] [blame] | 110 | // Hex escapes are a maximal series of hex digits. | 
|  | 111 | bool Overflow = false; | 
|  | 112 | for (; ThisTokBuf != ThisTokEnd; ++ThisTokBuf) { | 
|  | 113 | int CharVal = HexDigitValue(ThisTokBuf[0]); | 
|  | 114 | if (CharVal == -1) break; | 
| Chris Lattner | c29bbde | 2008-09-30 20:45:40 +0000 | [diff] [blame] | 115 | // About to shift out a digit? | 
|  | 116 | Overflow |= (ResultChar & 0xF0000000) ? true : false; | 
| Reid Spencer | 5f016e2 | 2007-07-11 17:01:13 +0000 | [diff] [blame] | 117 | ResultChar <<= 4; | 
|  | 118 | ResultChar |= CharVal; | 
|  | 119 | } | 
|  | 120 |  | 
|  | 121 | // See if any bits will be truncated when evaluated as a character. | 
| Reid Spencer | 5f016e2 | 2007-07-11 17:01:13 +0000 | [diff] [blame] | 122 | if (CharWidth != 32 && (ResultChar >> CharWidth) != 0) { | 
|  | 123 | Overflow = true; | 
|  | 124 | ResultChar &= ~0U >> (32-CharWidth); | 
|  | 125 | } | 
| Mike Stump | 1eb4433 | 2009-09-09 15:08:12 +0000 | [diff] [blame] | 126 |  | 
| Reid Spencer | 5f016e2 | 2007-07-11 17:01:13 +0000 | [diff] [blame] | 127 | // Check for overflow. | 
| Chris Lattner | 91f54ce | 2010-11-17 06:26:08 +0000 | [diff] [blame] | 128 | if (Overflow && Diags)   // Too many digits to fit in | 
|  | 129 | Diags->Report(Loc, diag::warn_hex_escape_too_large); | 
| Reid Spencer | 5f016e2 | 2007-07-11 17:01:13 +0000 | [diff] [blame] | 130 | break; | 
|  | 131 | } | 
|  | 132 | case '0': case '1': case '2': case '3': | 
|  | 133 | case '4': case '5': case '6': case '7': { | 
|  | 134 | // Octal escapes. | 
|  | 135 | --ThisTokBuf; | 
|  | 136 | ResultChar = 0; | 
|  | 137 |  | 
|  | 138 | // Octal escapes are a series of octal digits with maximum length 3. | 
|  | 139 | // "\0123" is a two digit sequence equal to "\012" "3". | 
|  | 140 | unsigned NumDigits = 0; | 
|  | 141 | do { | 
|  | 142 | ResultChar <<= 3; | 
|  | 143 | ResultChar |= *ThisTokBuf++ - '0'; | 
|  | 144 | ++NumDigits; | 
|  | 145 | } while (ThisTokBuf != ThisTokEnd && NumDigits < 3 && | 
|  | 146 | ThisTokBuf[0] >= '0' && ThisTokBuf[0] <= '7'); | 
| Mike Stump | 1eb4433 | 2009-09-09 15:08:12 +0000 | [diff] [blame] | 147 |  | 
| Reid Spencer | 5f016e2 | 2007-07-11 17:01:13 +0000 | [diff] [blame] | 148 | // Check for overflow.  Reject '\777', but not L'\777'. | 
| Reid Spencer | 5f016e2 | 2007-07-11 17:01:13 +0000 | [diff] [blame] | 149 | if (CharWidth != 32 && (ResultChar >> CharWidth) != 0) { | 
| Chris Lattner | 91f54ce | 2010-11-17 06:26:08 +0000 | [diff] [blame] | 150 | if (Diags) | 
|  | 151 | Diags->Report(Loc, diag::warn_octal_escape_too_large); | 
| Reid Spencer | 5f016e2 | 2007-07-11 17:01:13 +0000 | [diff] [blame] | 152 | ResultChar &= ~0U >> (32-CharWidth); | 
|  | 153 | } | 
|  | 154 | break; | 
|  | 155 | } | 
| Mike Stump | 1eb4433 | 2009-09-09 15:08:12 +0000 | [diff] [blame] | 156 |  | 
| Reid Spencer | 5f016e2 | 2007-07-11 17:01:13 +0000 | [diff] [blame] | 157 | // Otherwise, these are not valid escapes. | 
|  | 158 | case '(': case '{': case '[': case '%': | 
|  | 159 | // GCC accepts these as extensions.  We warn about them as such though. | 
| Chris Lattner | 91f54ce | 2010-11-17 06:26:08 +0000 | [diff] [blame] | 160 | if (Diags) | 
|  | 161 | Diags->Report(Loc, diag::ext_nonstandard_escape) | 
| Douglas Gregor | b90f4b3 | 2010-05-26 05:35:51 +0000 | [diff] [blame] | 162 | << std::string()+(char)ResultChar; | 
| Eli Friedman | f01fdff | 2009-04-28 00:51:18 +0000 | [diff] [blame] | 163 | break; | 
| Reid Spencer | 5f016e2 | 2007-07-11 17:01:13 +0000 | [diff] [blame] | 164 | default: | 
| Chris Lattner | 91f54ce | 2010-11-17 06:26:08 +0000 | [diff] [blame] | 165 | if (Diags == 0) | 
| Douglas Gregor | b90f4b3 | 2010-05-26 05:35:51 +0000 | [diff] [blame] | 166 | break; | 
|  | 167 |  | 
| Ted Kremenek | 23ef69d | 2010-12-03 00:09:56 +0000 | [diff] [blame] | 168 | if (isgraph(ResultChar)) | 
| Chris Lattner | 91f54ce | 2010-11-17 06:26:08 +0000 | [diff] [blame] | 169 | Diags->Report(Loc, diag::ext_unknown_escape) | 
|  | 170 | << std::string()+(char)ResultChar; | 
| Chris Lattner | ac92d82 | 2008-11-22 07:23:31 +0000 | [diff] [blame] | 171 | else | 
| Chris Lattner | 91f54ce | 2010-11-17 06:26:08 +0000 | [diff] [blame] | 172 | Diags->Report(Loc, diag::ext_unknown_escape) | 
|  | 173 | << "x"+llvm::utohexstr(ResultChar); | 
| Reid Spencer | 5f016e2 | 2007-07-11 17:01:13 +0000 | [diff] [blame] | 174 | break; | 
|  | 175 | } | 
| Mike Stump | 1eb4433 | 2009-09-09 15:08:12 +0000 | [diff] [blame] | 176 |  | 
| Reid Spencer | 5f016e2 | 2007-07-11 17:01:13 +0000 | [diff] [blame] | 177 | return ResultChar; | 
|  | 178 | } | 
|  | 179 |  | 
| Steve Naroff | 0e3e3eb | 2009-03-30 23:46:03 +0000 | [diff] [blame] | 180 | /// ProcessUCNEscape - Read the Universal Character Name, check constraints and | 
| Nico Weber | 59705ae | 2010-10-09 00:27:47 +0000 | [diff] [blame] | 181 | /// return the UTF32. | 
|  | 182 | static bool ProcessUCNEscape(const char *&ThisTokBuf, const char *ThisTokEnd, | 
|  | 183 | uint32_t &UcnVal, unsigned short &UcnLen, | 
| David Blaikie | d6471f7 | 2011-09-25 23:23:43 +0000 | [diff] [blame] | 184 | FullSourceLoc Loc, DiagnosticsEngine *Diags, | 
| Seth Cantrell | be77352 | 2012-01-18 12:27:04 +0000 | [diff] [blame] | 185 | const LangOptions &Features, | 
|  | 186 | bool in_char_string_literal = false) { | 
| Chris Lattner | 6c66f07 | 2010-11-17 06:46:14 +0000 | [diff] [blame] | 187 | if (!Features.CPlusPlus && !Features.C99 && Diags) | 
| Chris Lattner | 872a45e | 2010-11-17 06:55:10 +0000 | [diff] [blame] | 188 | Diags->Report(Loc, diag::warn_ucn_not_valid_in_c89); | 
| Mike Stump | 1eb4433 | 2009-09-09 15:08:12 +0000 | [diff] [blame] | 189 |  | 
| Steve Naroff | 4e93b34 | 2009-04-01 11:09:15 +0000 | [diff] [blame] | 190 | // Save the beginning of the string (for error diagnostics). | 
|  | 191 | const char *ThisTokBegin = ThisTokBuf; | 
| Mike Stump | 1eb4433 | 2009-09-09 15:08:12 +0000 | [diff] [blame] | 192 |  | 
| Steve Naroff | 0e3e3eb | 2009-03-30 23:46:03 +0000 | [diff] [blame] | 193 | // Skip the '\u' char's. | 
|  | 194 | ThisTokBuf += 2; | 
| Reid Spencer | 5f016e2 | 2007-07-11 17:01:13 +0000 | [diff] [blame] | 195 |  | 
| Steve Naroff | 0e3e3eb | 2009-03-30 23:46:03 +0000 | [diff] [blame] | 196 | if (ThisTokBuf == ThisTokEnd || !isxdigit(*ThisTokBuf)) { | 
| Chris Lattner | 6c66f07 | 2010-11-17 06:46:14 +0000 | [diff] [blame] | 197 | if (Diags) | 
| Chris Lattner | 872a45e | 2010-11-17 06:55:10 +0000 | [diff] [blame] | 198 | Diags->Report(Loc, diag::err_ucn_escape_no_digits); | 
| Nico Weber | 59705ae | 2010-10-09 00:27:47 +0000 | [diff] [blame] | 199 | return false; | 
| Steve Naroff | 0e3e3eb | 2009-03-30 23:46:03 +0000 | [diff] [blame] | 200 | } | 
| Nico Weber | 59705ae | 2010-10-09 00:27:47 +0000 | [diff] [blame] | 201 | UcnLen = (ThisTokBuf[-1] == 'u' ? 4 : 8); | 
| Fariborz Jahanian | 56bedef | 2010-08-31 23:34:27 +0000 | [diff] [blame] | 202 | unsigned short UcnLenSave = UcnLen; | 
| Nico Weber | 59705ae | 2010-10-09 00:27:47 +0000 | [diff] [blame] | 203 | for (; ThisTokBuf != ThisTokEnd && UcnLenSave; ++ThisTokBuf, UcnLenSave--) { | 
| Steve Naroff | 0e3e3eb | 2009-03-30 23:46:03 +0000 | [diff] [blame] | 204 | int CharVal = HexDigitValue(ThisTokBuf[0]); | 
|  | 205 | if (CharVal == -1) break; | 
|  | 206 | UcnVal <<= 4; | 
|  | 207 | UcnVal |= CharVal; | 
|  | 208 | } | 
|  | 209 | // If we didn't consume the proper number of digits, there is a problem. | 
| Nico Weber | 59705ae | 2010-10-09 00:27:47 +0000 | [diff] [blame] | 210 | if (UcnLenSave) { | 
| Chris Lattner | 872a45e | 2010-11-17 06:55:10 +0000 | [diff] [blame] | 211 | if (Diags) { | 
| Chris Lattner | 7ef5c27 | 2010-11-17 07:05:50 +0000 | [diff] [blame] | 212 | SourceLocation L = | 
|  | 213 | Lexer::AdvanceToTokenCharacter(Loc, ThisTokBuf-ThisTokBegin, | 
|  | 214 | Loc.getManager(), Features); | 
|  | 215 | Diags->Report(FullSourceLoc(L, Loc.getManager()), | 
|  | 216 | diag::err_ucn_escape_incomplete); | 
| Chris Lattner | 872a45e | 2010-11-17 06:55:10 +0000 | [diff] [blame] | 217 | } | 
| Nico Weber | 59705ae | 2010-10-09 00:27:47 +0000 | [diff] [blame] | 218 | return false; | 
| Steve Naroff | 0e3e3eb | 2009-03-30 23:46:03 +0000 | [diff] [blame] | 219 | } | 
| Seth Cantrell | be77352 | 2012-01-18 12:27:04 +0000 | [diff] [blame] | 220 | // Check UCN constraints (C99 6.4.3p2) [C++11 lex.charset p2] | 
|  | 221 | bool invalid_ucn = (0xD800<=UcnVal && UcnVal<=0xDFFF) // surrogate codepoints | 
|  | 222 | || 0x10FFFF < UcnVal; // maximum legal UTF32 value | 
|  | 223 |  | 
|  | 224 | // C++11 allows UCNs that refer to control characters and basic source | 
|  | 225 | // characters inside character and string literals | 
|  | 226 | if (!Features.CPlusPlus0x || !in_char_string_literal) { | 
|  | 227 | if ((UcnVal < 0xa0 && | 
|  | 228 | (UcnVal != 0x24 && UcnVal != 0x40 && UcnVal != 0x60 ))) {  // $, @, ` | 
|  | 229 | invalid_ucn = true; | 
|  | 230 | } | 
|  | 231 | } | 
|  | 232 |  | 
|  | 233 | if (invalid_ucn) { | 
| Chris Lattner | 6c66f07 | 2010-11-17 06:46:14 +0000 | [diff] [blame] | 234 | if (Diags) | 
| Chris Lattner | 872a45e | 2010-11-17 06:55:10 +0000 | [diff] [blame] | 235 | Diags->Report(Loc, diag::err_ucn_escape_invalid); | 
| Nico Weber | 59705ae | 2010-10-09 00:27:47 +0000 | [diff] [blame] | 236 | return false; | 
|  | 237 | } | 
|  | 238 | return true; | 
|  | 239 | } | 
|  | 240 |  | 
|  | 241 | /// EncodeUCNEscape - Read the Universal Character Name, check constraints and | 
|  | 242 | /// convert the UTF32 to UTF8 or UTF16. This is a subroutine of | 
|  | 243 | /// StringLiteralParser. When we decide to implement UCN's for identifiers, | 
|  | 244 | /// we will likely rework our support for UCN's. | 
|  | 245 | static void EncodeUCNEscape(const char *&ThisTokBuf, const char *ThisTokEnd, | 
| Chris Lattner | a95880d | 2010-11-17 07:12:42 +0000 | [diff] [blame] | 246 | char *&ResultBuf, bool &HadError, | 
| Douglas Gregor | 5cee119 | 2011-07-27 05:40:30 +0000 | [diff] [blame] | 247 | FullSourceLoc Loc, unsigned CharByteWidth, | 
| David Blaikie | d6471f7 | 2011-09-25 23:23:43 +0000 | [diff] [blame] | 248 | DiagnosticsEngine *Diags, | 
|  | 249 | const LangOptions &Features) { | 
| Nico Weber | 59705ae | 2010-10-09 00:27:47 +0000 | [diff] [blame] | 250 | typedef uint32_t UTF32; | 
|  | 251 | UTF32 UcnVal = 0; | 
|  | 252 | unsigned short UcnLen = 0; | 
| Chris Lattner | a95880d | 2010-11-17 07:12:42 +0000 | [diff] [blame] | 253 | if (!ProcessUCNEscape(ThisTokBuf, ThisTokEnd, UcnVal, UcnLen, Loc, Diags, | 
|  | 254 | Features)) { | 
| Steve Naroff | 0e3e3eb | 2009-03-30 23:46:03 +0000 | [diff] [blame] | 255 | HadError = 1; | 
|  | 256 | return; | 
|  | 257 | } | 
| Nico Weber | 59705ae | 2010-10-09 00:27:47 +0000 | [diff] [blame] | 258 |  | 
| Douglas Gregor | 5cee119 | 2011-07-27 05:40:30 +0000 | [diff] [blame] | 259 | assert((CharByteWidth == 1 || CharByteWidth == 2 || CharByteWidth) && | 
|  | 260 | "only character widths of 1, 2, or 4 bytes supported"); | 
| Nico Weber | a0f15b0 | 2010-10-06 04:57:26 +0000 | [diff] [blame] | 261 |  | 
| Douglas Gregor | 5cee119 | 2011-07-27 05:40:30 +0000 | [diff] [blame] | 262 | (void)UcnLen; | 
|  | 263 | assert((UcnLen== 4 || UcnLen== 8) && "only ucn length of 4 or 8 supported"); | 
| Nico Weber | a0f15b0 | 2010-10-06 04:57:26 +0000 | [diff] [blame] | 264 |  | 
| Douglas Gregor | 5cee119 | 2011-07-27 05:40:30 +0000 | [diff] [blame] | 265 | if (CharByteWidth == 4) { | 
| Eli Friedman | caf1f26 | 2011-11-02 23:06:23 +0000 | [diff] [blame] | 266 | // FIXME: Make the type of the result buffer correct instead of | 
|  | 267 | // using reinterpret_cast. | 
|  | 268 | UTF32 *ResultPtr = reinterpret_cast<UTF32*>(ResultBuf); | 
|  | 269 | *ResultPtr = UcnVal; | 
|  | 270 | ResultBuf += 4; | 
| Douglas Gregor | 5cee119 | 2011-07-27 05:40:30 +0000 | [diff] [blame] | 271 | return; | 
|  | 272 | } | 
|  | 273 |  | 
|  | 274 | if (CharByteWidth == 2) { | 
| Eli Friedman | caf1f26 | 2011-11-02 23:06:23 +0000 | [diff] [blame] | 275 | // FIXME: Make the type of the result buffer correct instead of | 
|  | 276 | // using reinterpret_cast. | 
|  | 277 | UTF16 *ResultPtr = reinterpret_cast<UTF16*>(ResultBuf); | 
|  | 278 |  | 
| Nico Weber | a0f15b0 | 2010-10-06 04:57:26 +0000 | [diff] [blame] | 279 | if (UcnVal < (UTF32)0xFFFF) { | 
| Eli Friedman | caf1f26 | 2011-11-02 23:06:23 +0000 | [diff] [blame] | 280 | *ResultPtr = UcnVal; | 
|  | 281 | ResultBuf += 2; | 
| Nico Weber | a0f15b0 | 2010-10-06 04:57:26 +0000 | [diff] [blame] | 282 | return; | 
|  | 283 | } | 
| Nico Weber | a0f15b0 | 2010-10-06 04:57:26 +0000 | [diff] [blame] | 284 |  | 
| Eli Friedman | caf1f26 | 2011-11-02 23:06:23 +0000 | [diff] [blame] | 285 | // Convert to UTF16. | 
| Nico Weber | a0f15b0 | 2010-10-06 04:57:26 +0000 | [diff] [blame] | 286 | UcnVal -= 0x10000; | 
| Eli Friedman | caf1f26 | 2011-11-02 23:06:23 +0000 | [diff] [blame] | 287 | *ResultPtr     = 0xD800 + (UcnVal >> 10); | 
|  | 288 | *(ResultPtr+1) = 0xDC00 + (UcnVal & 0x3FF); | 
|  | 289 | ResultBuf += 4; | 
| Fariborz Jahanian | 56bedef | 2010-08-31 23:34:27 +0000 | [diff] [blame] | 290 | return; | 
|  | 291 | } | 
| Douglas Gregor | 5cee119 | 2011-07-27 05:40:30 +0000 | [diff] [blame] | 292 |  | 
|  | 293 | assert(CharByteWidth == 1 && "UTF-8 encoding is only for 1 byte characters"); | 
|  | 294 |  | 
| Steve Naroff | 0e3e3eb | 2009-03-30 23:46:03 +0000 | [diff] [blame] | 295 | // Now that we've parsed/checked the UCN, we convert from UTF32->UTF8. | 
|  | 296 | // The conversion below was inspired by: | 
|  | 297 | //   http://www.unicode.org/Public/PROGRAMS/CVTUTF/ConvertUTF.c | 
| Mike Stump | 1eb4433 | 2009-09-09 15:08:12 +0000 | [diff] [blame] | 298 | // First, we determine how many bytes the result will require. | 
| Steve Naroff | 4e93b34 | 2009-04-01 11:09:15 +0000 | [diff] [blame] | 299 | typedef uint8_t UTF8; | 
| Steve Naroff | 0e3e3eb | 2009-03-30 23:46:03 +0000 | [diff] [blame] | 300 |  | 
|  | 301 | unsigned short bytesToWrite = 0; | 
|  | 302 | if (UcnVal < (UTF32)0x80) | 
|  | 303 | bytesToWrite = 1; | 
|  | 304 | else if (UcnVal < (UTF32)0x800) | 
|  | 305 | bytesToWrite = 2; | 
|  | 306 | else if (UcnVal < (UTF32)0x10000) | 
|  | 307 | bytesToWrite = 3; | 
|  | 308 | else | 
|  | 309 | bytesToWrite = 4; | 
| Mike Stump | 1eb4433 | 2009-09-09 15:08:12 +0000 | [diff] [blame] | 310 |  | 
| Steve Naroff | 0e3e3eb | 2009-03-30 23:46:03 +0000 | [diff] [blame] | 311 | const unsigned byteMask = 0xBF; | 
|  | 312 | const unsigned byteMark = 0x80; | 
| Mike Stump | 1eb4433 | 2009-09-09 15:08:12 +0000 | [diff] [blame] | 313 |  | 
| Steve Naroff | 0e3e3eb | 2009-03-30 23:46:03 +0000 | [diff] [blame] | 314 | // Once the bits are split out into bytes of UTF8, this is a mask OR-ed | 
| Steve Naroff | 8a5c0cd | 2009-03-31 10:29:45 +0000 | [diff] [blame] | 315 | // into the first byte, depending on how many bytes follow. | 
| Mike Stump | 1eb4433 | 2009-09-09 15:08:12 +0000 | [diff] [blame] | 316 | static const UTF8 firstByteMark[5] = { | 
| Steve Naroff | 8a5c0cd | 2009-03-31 10:29:45 +0000 | [diff] [blame] | 317 | 0x00, 0x00, 0xC0, 0xE0, 0xF0 | 
| Steve Naroff | 0e3e3eb | 2009-03-30 23:46:03 +0000 | [diff] [blame] | 318 | }; | 
|  | 319 | // Finally, we write the bytes into ResultBuf. | 
|  | 320 | ResultBuf += bytesToWrite; | 
|  | 321 | switch (bytesToWrite) { // note: everything falls through. | 
|  | 322 | case 4: *--ResultBuf = (UTF8)((UcnVal | byteMark) & byteMask); UcnVal >>= 6; | 
|  | 323 | case 3: *--ResultBuf = (UTF8)((UcnVal | byteMark) & byteMask); UcnVal >>= 6; | 
|  | 324 | case 2: *--ResultBuf = (UTF8)((UcnVal | byteMark) & byteMask); UcnVal >>= 6; | 
|  | 325 | case 1: *--ResultBuf = (UTF8) (UcnVal | firstByteMark[bytesToWrite]); | 
|  | 326 | } | 
|  | 327 | // Update the buffer. | 
|  | 328 | ResultBuf += bytesToWrite; | 
|  | 329 | } | 
| Reid Spencer | 5f016e2 | 2007-07-11 17:01:13 +0000 | [diff] [blame] | 330 |  | 
|  | 331 |  | 
|  | 332 | ///       integer-constant: [C99 6.4.4.1] | 
|  | 333 | ///         decimal-constant integer-suffix | 
|  | 334 | ///         octal-constant integer-suffix | 
|  | 335 | ///         hexadecimal-constant integer-suffix | 
| Mike Stump | 1eb4433 | 2009-09-09 15:08:12 +0000 | [diff] [blame] | 336 | ///       decimal-constant: | 
| Reid Spencer | 5f016e2 | 2007-07-11 17:01:13 +0000 | [diff] [blame] | 337 | ///         nonzero-digit | 
|  | 338 | ///         decimal-constant digit | 
| Mike Stump | 1eb4433 | 2009-09-09 15:08:12 +0000 | [diff] [blame] | 339 | ///       octal-constant: | 
| Reid Spencer | 5f016e2 | 2007-07-11 17:01:13 +0000 | [diff] [blame] | 340 | ///         0 | 
|  | 341 | ///         octal-constant octal-digit | 
| Mike Stump | 1eb4433 | 2009-09-09 15:08:12 +0000 | [diff] [blame] | 342 | ///       hexadecimal-constant: | 
| Reid Spencer | 5f016e2 | 2007-07-11 17:01:13 +0000 | [diff] [blame] | 343 | ///         hexadecimal-prefix hexadecimal-digit | 
|  | 344 | ///         hexadecimal-constant hexadecimal-digit | 
|  | 345 | ///       hexadecimal-prefix: one of | 
|  | 346 | ///         0x 0X | 
|  | 347 | ///       integer-suffix: | 
|  | 348 | ///         unsigned-suffix [long-suffix] | 
|  | 349 | ///         unsigned-suffix [long-long-suffix] | 
|  | 350 | ///         long-suffix [unsigned-suffix] | 
|  | 351 | ///         long-long-suffix [unsigned-sufix] | 
|  | 352 | ///       nonzero-digit: | 
|  | 353 | ///         1 2 3 4 5 6 7 8 9 | 
|  | 354 | ///       octal-digit: | 
|  | 355 | ///         0 1 2 3 4 5 6 7 | 
|  | 356 | ///       hexadecimal-digit: | 
|  | 357 | ///         0 1 2 3 4 5 6 7 8 9 | 
|  | 358 | ///         a b c d e f | 
|  | 359 | ///         A B C D E F | 
|  | 360 | ///       unsigned-suffix: one of | 
|  | 361 | ///         u U | 
|  | 362 | ///       long-suffix: one of | 
|  | 363 | ///         l L | 
| Mike Stump | 1eb4433 | 2009-09-09 15:08:12 +0000 | [diff] [blame] | 364 | ///       long-long-suffix: one of | 
| Reid Spencer | 5f016e2 | 2007-07-11 17:01:13 +0000 | [diff] [blame] | 365 | ///         ll LL | 
|  | 366 | /// | 
|  | 367 | ///       floating-constant: [C99 6.4.4.2] | 
|  | 368 | ///         TODO: add rules... | 
|  | 369 | /// | 
| Reid Spencer | 5f016e2 | 2007-07-11 17:01:13 +0000 | [diff] [blame] | 370 | NumericLiteralParser:: | 
|  | 371 | NumericLiteralParser(const char *begin, const char *end, | 
|  | 372 | SourceLocation TokLoc, Preprocessor &pp) | 
|  | 373 | : PP(pp), ThisTokBegin(begin), ThisTokEnd(end) { | 
| Mike Stump | 1eb4433 | 2009-09-09 15:08:12 +0000 | [diff] [blame] | 374 |  | 
| Chris Lattner | c29bbde | 2008-09-30 20:45:40 +0000 | [diff] [blame] | 375 | // This routine assumes that the range begin/end matches the regex for integer | 
|  | 376 | // and FP constants (specifically, the 'pp-number' regex), and assumes that | 
|  | 377 | // the byte at "*end" is both valid and not part of the regex.  Because of | 
|  | 378 | // this, it doesn't have to check for 'overscan' in various places. | 
|  | 379 | assert(!isalnum(*end) && *end != '.' && *end != '_' && | 
|  | 380 | "Lexer didn't maximally munch?"); | 
| Mike Stump | 1eb4433 | 2009-09-09 15:08:12 +0000 | [diff] [blame] | 381 |  | 
| Reid Spencer | 5f016e2 | 2007-07-11 17:01:13 +0000 | [diff] [blame] | 382 | s = DigitsBegin = begin; | 
|  | 383 | saw_exponent = false; | 
|  | 384 | saw_period = false; | 
| Reid Spencer | 5f016e2 | 2007-07-11 17:01:13 +0000 | [diff] [blame] | 385 | isLong = false; | 
|  | 386 | isUnsigned = false; | 
|  | 387 | isLongLong = false; | 
| Chris Lattner | 6e400c2 | 2007-08-26 03:29:23 +0000 | [diff] [blame] | 388 | isFloat = false; | 
| Chris Lattner | 506b8de | 2007-08-26 01:58:14 +0000 | [diff] [blame] | 389 | isImaginary = false; | 
| Mike Stump | b79fe2d | 2009-10-08 22:55:36 +0000 | [diff] [blame] | 390 | isMicrosoftInteger = false; | 
| Reid Spencer | 5f016e2 | 2007-07-11 17:01:13 +0000 | [diff] [blame] | 391 | hadError = false; | 
| Mike Stump | 1eb4433 | 2009-09-09 15:08:12 +0000 | [diff] [blame] | 392 |  | 
| Reid Spencer | 5f016e2 | 2007-07-11 17:01:13 +0000 | [diff] [blame] | 393 | if (*s == '0') { // parse radix | 
| Chris Lattner | 368328c | 2008-06-30 06:39:54 +0000 | [diff] [blame] | 394 | ParseNumberStartingWithZero(TokLoc); | 
|  | 395 | if (hadError) | 
|  | 396 | return; | 
| Reid Spencer | 5f016e2 | 2007-07-11 17:01:13 +0000 | [diff] [blame] | 397 | } else { // the first digit is non-zero | 
|  | 398 | radix = 10; | 
|  | 399 | s = SkipDigits(s); | 
|  | 400 | if (s == ThisTokEnd) { | 
|  | 401 | // Done. | 
| Christopher Lamb | 016765e | 2007-11-29 06:06:27 +0000 | [diff] [blame] | 402 | } else if (isxdigit(*s) && !(*s == 'e' || *s == 'E')) { | 
| Chris Lattner | ac92d82 | 2008-11-22 07:23:31 +0000 | [diff] [blame] | 403 | PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, s-begin), | 
| Chris Lattner | 5f9e272 | 2011-07-23 10:55:15 +0000 | [diff] [blame] | 404 | diag::err_invalid_decimal_digit) << StringRef(s, 1); | 
| Chris Lattner | ac92d82 | 2008-11-22 07:23:31 +0000 | [diff] [blame] | 405 | hadError = true; | 
| Reid Spencer | 5f016e2 | 2007-07-11 17:01:13 +0000 | [diff] [blame] | 406 | return; | 
|  | 407 | } else if (*s == '.') { | 
|  | 408 | s++; | 
|  | 409 | saw_period = true; | 
|  | 410 | s = SkipDigits(s); | 
| Mike Stump | 1eb4433 | 2009-09-09 15:08:12 +0000 | [diff] [blame] | 411 | } | 
| Chris Lattner | 4411f46 | 2008-09-29 23:12:31 +0000 | [diff] [blame] | 412 | if ((*s == 'e' || *s == 'E')) { // exponent | 
| Chris Lattner | 70f66ab | 2008-04-20 18:47:55 +0000 | [diff] [blame] | 413 | const char *Exponent = s; | 
| Reid Spencer | 5f016e2 | 2007-07-11 17:01:13 +0000 | [diff] [blame] | 414 | s++; | 
|  | 415 | saw_exponent = true; | 
|  | 416 | if (*s == '+' || *s == '-')  s++; // sign | 
|  | 417 | const char *first_non_digit = SkipDigits(s); | 
| Chris Lattner | 0b7f69d | 2008-04-20 18:41:46 +0000 | [diff] [blame] | 418 | if (first_non_digit != s) { | 
| Reid Spencer | 5f016e2 | 2007-07-11 17:01:13 +0000 | [diff] [blame] | 419 | s = first_non_digit; | 
| Chris Lattner | 0b7f69d | 2008-04-20 18:41:46 +0000 | [diff] [blame] | 420 | } else { | 
| Chris Lattner | ac92d82 | 2008-11-22 07:23:31 +0000 | [diff] [blame] | 421 | PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, Exponent-begin), | 
|  | 422 | diag::err_exponent_has_no_digits); | 
|  | 423 | hadError = true; | 
| Chris Lattner | 0b7f69d | 2008-04-20 18:41:46 +0000 | [diff] [blame] | 424 | return; | 
| Reid Spencer | 5f016e2 | 2007-07-11 17:01:13 +0000 | [diff] [blame] | 425 | } | 
|  | 426 | } | 
|  | 427 | } | 
|  | 428 |  | 
|  | 429 | SuffixBegin = s; | 
| Mike Stump | 1eb4433 | 2009-09-09 15:08:12 +0000 | [diff] [blame] | 430 |  | 
| Chris Lattner | 506b8de | 2007-08-26 01:58:14 +0000 | [diff] [blame] | 431 | // Parse the suffix.  At this point we can classify whether we have an FP or | 
|  | 432 | // integer constant. | 
|  | 433 | bool isFPConstant = isFloatingLiteral(); | 
| Mike Stump | 1eb4433 | 2009-09-09 15:08:12 +0000 | [diff] [blame] | 434 |  | 
| Chris Lattner | 506b8de | 2007-08-26 01:58:14 +0000 | [diff] [blame] | 435 | // Loop over all of the characters of the suffix.  If we see something bad, | 
|  | 436 | // we break out of the loop. | 
|  | 437 | for (; s != ThisTokEnd; ++s) { | 
|  | 438 | switch (*s) { | 
|  | 439 | case 'f':      // FP Suffix for "float" | 
|  | 440 | case 'F': | 
|  | 441 | if (!isFPConstant) break;  // Error for integer constant. | 
| Chris Lattner | 6e400c2 | 2007-08-26 03:29:23 +0000 | [diff] [blame] | 442 | if (isFloat || isLong) break; // FF, LF invalid. | 
|  | 443 | isFloat = true; | 
| Chris Lattner | 506b8de | 2007-08-26 01:58:14 +0000 | [diff] [blame] | 444 | continue;  // Success. | 
|  | 445 | case 'u': | 
|  | 446 | case 'U': | 
|  | 447 | if (isFPConstant) break;  // Error for floating constant. | 
|  | 448 | if (isUnsigned) break;    // Cannot be repeated. | 
|  | 449 | isUnsigned = true; | 
|  | 450 | continue;  // Success. | 
|  | 451 | case 'l': | 
|  | 452 | case 'L': | 
|  | 453 | if (isLong || isLongLong) break;  // Cannot be repeated. | 
| Chris Lattner | 6e400c2 | 2007-08-26 03:29:23 +0000 | [diff] [blame] | 454 | if (isFloat) break;               // LF invalid. | 
| Mike Stump | 1eb4433 | 2009-09-09 15:08:12 +0000 | [diff] [blame] | 455 |  | 
| Chris Lattner | 506b8de | 2007-08-26 01:58:14 +0000 | [diff] [blame] | 456 | // Check for long long.  The L's need to be adjacent and the same case. | 
|  | 457 | if (s+1 != ThisTokEnd && s[1] == s[0]) { | 
|  | 458 | if (isFPConstant) break;        // long long invalid for floats. | 
|  | 459 | isLongLong = true; | 
|  | 460 | ++s;  // Eat both of them. | 
|  | 461 | } else { | 
| Reid Spencer | 5f016e2 | 2007-07-11 17:01:13 +0000 | [diff] [blame] | 462 | isLong = true; | 
| Reid Spencer | 5f016e2 | 2007-07-11 17:01:13 +0000 | [diff] [blame] | 463 | } | 
| Chris Lattner | 506b8de | 2007-08-26 01:58:14 +0000 | [diff] [blame] | 464 | continue;  // Success. | 
|  | 465 | case 'i': | 
| Chris Lattner | c637415 | 2010-10-14 00:24:10 +0000 | [diff] [blame] | 466 | case 'I': | 
| Francois Pichet | 62ec1f2 | 2011-09-17 17:15:52 +0000 | [diff] [blame] | 467 | if (PP.getLangOptions().MicrosoftExt) { | 
| Fariborz Jahanian | a8be02b | 2010-01-22 21:36:53 +0000 | [diff] [blame] | 468 | if (isFPConstant || isLong || isLongLong) break; | 
| Nuno Lopes | 6e8c7ac | 2009-11-28 13:37:52 +0000 | [diff] [blame] | 469 |  | 
| Steve Naroff | 0c29b22 | 2008-04-04 21:02:54 +0000 | [diff] [blame] | 470 | // Allow i8, i16, i32, i64, and i128. | 
| Mike Stump | b79fe2d | 2009-10-08 22:55:36 +0000 | [diff] [blame] | 471 | if (s + 1 != ThisTokEnd) { | 
|  | 472 | switch (s[1]) { | 
|  | 473 | case '8': | 
|  | 474 | s += 2; // i8 suffix | 
|  | 475 | isMicrosoftInteger = true; | 
| Nuno Lopes | 6e8c7ac | 2009-11-28 13:37:52 +0000 | [diff] [blame] | 476 | break; | 
| Mike Stump | b79fe2d | 2009-10-08 22:55:36 +0000 | [diff] [blame] | 477 | case '1': | 
| Nuno Lopes | 6e8c7ac | 2009-11-28 13:37:52 +0000 | [diff] [blame] | 478 | if (s + 2 == ThisTokEnd) break; | 
| Francois Pichet | d062b60 | 2011-01-11 11:57:53 +0000 | [diff] [blame] | 479 | if (s[2] == '6') { | 
|  | 480 | s += 3; // i16 suffix | 
|  | 481 | isMicrosoftInteger = true; | 
|  | 482 | } | 
| Nuno Lopes | 6e8c7ac | 2009-11-28 13:37:52 +0000 | [diff] [blame] | 483 | else if (s[2] == '2') { | 
|  | 484 | if (s + 3 == ThisTokEnd) break; | 
| Francois Pichet | d062b60 | 2011-01-11 11:57:53 +0000 | [diff] [blame] | 485 | if (s[3] == '8') { | 
|  | 486 | s += 4; // i128 suffix | 
|  | 487 | isMicrosoftInteger = true; | 
|  | 488 | } | 
| Mike Stump | b79fe2d | 2009-10-08 22:55:36 +0000 | [diff] [blame] | 489 | } | 
| Nuno Lopes | 6e8c7ac | 2009-11-28 13:37:52 +0000 | [diff] [blame] | 490 | break; | 
| Mike Stump | b79fe2d | 2009-10-08 22:55:36 +0000 | [diff] [blame] | 491 | case '3': | 
| Nuno Lopes | 6e8c7ac | 2009-11-28 13:37:52 +0000 | [diff] [blame] | 492 | if (s + 2 == ThisTokEnd) break; | 
| Francois Pichet | d062b60 | 2011-01-11 11:57:53 +0000 | [diff] [blame] | 493 | if (s[2] == '2') { | 
|  | 494 | s += 3; // i32 suffix | 
|  | 495 | isLong = true; | 
|  | 496 | isMicrosoftInteger = true; | 
|  | 497 | } | 
| Nuno Lopes | 6e8c7ac | 2009-11-28 13:37:52 +0000 | [diff] [blame] | 498 | break; | 
| Mike Stump | b79fe2d | 2009-10-08 22:55:36 +0000 | [diff] [blame] | 499 | case '6': | 
| Nuno Lopes | 6e8c7ac | 2009-11-28 13:37:52 +0000 | [diff] [blame] | 500 | if (s + 2 == ThisTokEnd) break; | 
| Francois Pichet | d062b60 | 2011-01-11 11:57:53 +0000 | [diff] [blame] | 501 | if (s[2] == '4') { | 
|  | 502 | s += 3; // i64 suffix | 
|  | 503 | isLongLong = true; | 
|  | 504 | isMicrosoftInteger = true; | 
|  | 505 | } | 
| Nuno Lopes | 6e8c7ac | 2009-11-28 13:37:52 +0000 | [diff] [blame] | 506 | break; | 
| Mike Stump | b79fe2d | 2009-10-08 22:55:36 +0000 | [diff] [blame] | 507 | default: | 
|  | 508 | break; | 
|  | 509 | } | 
|  | 510 | break; | 
| Steve Naroff | 0c29b22 | 2008-04-04 21:02:54 +0000 | [diff] [blame] | 511 | } | 
| Steve Naroff | 0c29b22 | 2008-04-04 21:02:54 +0000 | [diff] [blame] | 512 | } | 
|  | 513 | // fall through. | 
| Chris Lattner | 506b8de | 2007-08-26 01:58:14 +0000 | [diff] [blame] | 514 | case 'j': | 
|  | 515 | case 'J': | 
|  | 516 | if (isImaginary) break;   // Cannot be repeated. | 
|  | 517 | PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, s-begin), | 
|  | 518 | diag::ext_imaginary_constant); | 
|  | 519 | isImaginary = true; | 
|  | 520 | continue;  // Success. | 
| Reid Spencer | 5f016e2 | 2007-07-11 17:01:13 +0000 | [diff] [blame] | 521 | } | 
| Chris Lattner | 506b8de | 2007-08-26 01:58:14 +0000 | [diff] [blame] | 522 | // If we reached here, there was an error. | 
|  | 523 | break; | 
|  | 524 | } | 
| Mike Stump | 1eb4433 | 2009-09-09 15:08:12 +0000 | [diff] [blame] | 525 |  | 
| Chris Lattner | 506b8de | 2007-08-26 01:58:14 +0000 | [diff] [blame] | 526 | // Report an error if there are any. | 
|  | 527 | if (s != ThisTokEnd) { | 
| Chris Lattner | ac92d82 | 2008-11-22 07:23:31 +0000 | [diff] [blame] | 528 | PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, s-begin), | 
|  | 529 | isFPConstant ? diag::err_invalid_suffix_float_constant : | 
|  | 530 | diag::err_invalid_suffix_integer_constant) | 
| Chris Lattner | 5f9e272 | 2011-07-23 10:55:15 +0000 | [diff] [blame] | 531 | << StringRef(SuffixBegin, ThisTokEnd-SuffixBegin); | 
| Chris Lattner | ac92d82 | 2008-11-22 07:23:31 +0000 | [diff] [blame] | 532 | hadError = true; | 
| Chris Lattner | 506b8de | 2007-08-26 01:58:14 +0000 | [diff] [blame] | 533 | return; | 
| Reid Spencer | 5f016e2 | 2007-07-11 17:01:13 +0000 | [diff] [blame] | 534 | } | 
|  | 535 | } | 
|  | 536 |  | 
| Chris Lattner | 368328c | 2008-06-30 06:39:54 +0000 | [diff] [blame] | 537 | /// ParseNumberStartingWithZero - This method is called when the first character | 
|  | 538 | /// of the number is found to be a zero.  This means it is either an octal | 
|  | 539 | /// number (like '04') or a hex number ('0x123a') a binary number ('0b1010') or | 
| Mike Stump | 1eb4433 | 2009-09-09 15:08:12 +0000 | [diff] [blame] | 540 | /// a floating point number (01239.123e4).  Eat the prefix, determining the | 
| Chris Lattner | 368328c | 2008-06-30 06:39:54 +0000 | [diff] [blame] | 541 | /// radix etc. | 
|  | 542 | void NumericLiteralParser::ParseNumberStartingWithZero(SourceLocation TokLoc) { | 
|  | 543 | assert(s[0] == '0' && "Invalid method call"); | 
|  | 544 | s++; | 
| Mike Stump | 1eb4433 | 2009-09-09 15:08:12 +0000 | [diff] [blame] | 545 |  | 
| Chris Lattner | 368328c | 2008-06-30 06:39:54 +0000 | [diff] [blame] | 546 | // Handle a hex number like 0x1234. | 
|  | 547 | if ((*s == 'x' || *s == 'X') && (isxdigit(s[1]) || s[1] == '.')) { | 
|  | 548 | s++; | 
|  | 549 | radix = 16; | 
|  | 550 | DigitsBegin = s; | 
|  | 551 | s = SkipHexDigits(s); | 
| Aaron Ballman | 66b0eba | 2012-02-08 13:36:33 +0000 | [diff] [blame] | 552 | bool noSignificand = (s == DigitsBegin); | 
| Chris Lattner | 368328c | 2008-06-30 06:39:54 +0000 | [diff] [blame] | 553 | if (s == ThisTokEnd) { | 
|  | 554 | // Done. | 
|  | 555 | } else if (*s == '.') { | 
|  | 556 | s++; | 
|  | 557 | saw_period = true; | 
| Aaron Ballman | 66b0eba | 2012-02-08 13:36:33 +0000 | [diff] [blame] | 558 | const char *floatDigitsBegin = s; | 
| Chris Lattner | 368328c | 2008-06-30 06:39:54 +0000 | [diff] [blame] | 559 | s = SkipHexDigits(s); | 
| Aaron Ballman | 66b0eba | 2012-02-08 13:36:33 +0000 | [diff] [blame] | 560 | noSignificand &= (floatDigitsBegin == s); | 
| Chris Lattner | 368328c | 2008-06-30 06:39:54 +0000 | [diff] [blame] | 561 | } | 
| Aaron Ballman | 66b0eba | 2012-02-08 13:36:33 +0000 | [diff] [blame] | 562 |  | 
|  | 563 | if (noSignificand) { | 
|  | 564 | PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, s-ThisTokBegin), \ | 
|  | 565 | diag::err_hexconstant_requires_digits); | 
|  | 566 | hadError = true; | 
|  | 567 | return; | 
|  | 568 | } | 
|  | 569 |  | 
| Chris Lattner | 368328c | 2008-06-30 06:39:54 +0000 | [diff] [blame] | 570 | // A binary exponent can appear with or with a '.'. If dotted, the | 
| Mike Stump | 1eb4433 | 2009-09-09 15:08:12 +0000 | [diff] [blame] | 571 | // binary exponent is required. | 
| Douglas Gregor | 1155c42 | 2011-08-30 22:40:35 +0000 | [diff] [blame] | 572 | if (*s == 'p' || *s == 'P') { | 
| Chris Lattner | 368328c | 2008-06-30 06:39:54 +0000 | [diff] [blame] | 573 | const char *Exponent = s; | 
|  | 574 | s++; | 
|  | 575 | saw_exponent = true; | 
|  | 576 | if (*s == '+' || *s == '-')  s++; // sign | 
|  | 577 | const char *first_non_digit = SkipDigits(s); | 
| Chris Lattner | 6ea6238 | 2008-07-25 18:18:34 +0000 | [diff] [blame] | 578 | if (first_non_digit == s) { | 
| Chris Lattner | ac92d82 | 2008-11-22 07:23:31 +0000 | [diff] [blame] | 579 | PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, Exponent-ThisTokBegin), | 
|  | 580 | diag::err_exponent_has_no_digits); | 
|  | 581 | hadError = true; | 
| Chris Lattner | 6ea6238 | 2008-07-25 18:18:34 +0000 | [diff] [blame] | 582 | return; | 
| Chris Lattner | 368328c | 2008-06-30 06:39:54 +0000 | [diff] [blame] | 583 | } | 
| Chris Lattner | 6ea6238 | 2008-07-25 18:18:34 +0000 | [diff] [blame] | 584 | s = first_non_digit; | 
| Mike Stump | 1eb4433 | 2009-09-09 15:08:12 +0000 | [diff] [blame] | 585 |  | 
| Douglas Gregor | 1155c42 | 2011-08-30 22:40:35 +0000 | [diff] [blame] | 586 | if (!PP.getLangOptions().HexFloats) | 
| Chris Lattner | ac92d82 | 2008-11-22 07:23:31 +0000 | [diff] [blame] | 587 | PP.Diag(TokLoc, diag::ext_hexconstant_invalid); | 
| Chris Lattner | 368328c | 2008-06-30 06:39:54 +0000 | [diff] [blame] | 588 | } else if (saw_period) { | 
| Chris Lattner | ac92d82 | 2008-11-22 07:23:31 +0000 | [diff] [blame] | 589 | PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, s-ThisTokBegin), | 
|  | 590 | diag::err_hexconstant_requires_exponent); | 
|  | 591 | hadError = true; | 
| Chris Lattner | 368328c | 2008-06-30 06:39:54 +0000 | [diff] [blame] | 592 | } | 
|  | 593 | return; | 
|  | 594 | } | 
| Mike Stump | 1eb4433 | 2009-09-09 15:08:12 +0000 | [diff] [blame] | 595 |  | 
| Chris Lattner | 368328c | 2008-06-30 06:39:54 +0000 | [diff] [blame] | 596 | // Handle simple binary numbers 0b01010 | 
|  | 597 | if (*s == 'b' || *s == 'B') { | 
|  | 598 | // 0b101010 is a GCC extension. | 
| Chris Lattner | 413d355 | 2008-06-30 06:44:49 +0000 | [diff] [blame] | 599 | PP.Diag(TokLoc, diag::ext_binary_literal); | 
| Chris Lattner | 368328c | 2008-06-30 06:39:54 +0000 | [diff] [blame] | 600 | ++s; | 
|  | 601 | radix = 2; | 
|  | 602 | DigitsBegin = s; | 
|  | 603 | s = SkipBinaryDigits(s); | 
|  | 604 | if (s == ThisTokEnd) { | 
|  | 605 | // Done. | 
|  | 606 | } else if (isxdigit(*s)) { | 
| Chris Lattner | ac92d82 | 2008-11-22 07:23:31 +0000 | [diff] [blame] | 607 | PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, s-ThisTokBegin), | 
| Chris Lattner | 5f9e272 | 2011-07-23 10:55:15 +0000 | [diff] [blame] | 608 | diag::err_invalid_binary_digit) << StringRef(s, 1); | 
| Chris Lattner | ac92d82 | 2008-11-22 07:23:31 +0000 | [diff] [blame] | 609 | hadError = true; | 
| Chris Lattner | 368328c | 2008-06-30 06:39:54 +0000 | [diff] [blame] | 610 | } | 
| Chris Lattner | 413d355 | 2008-06-30 06:44:49 +0000 | [diff] [blame] | 611 | // Other suffixes will be diagnosed by the caller. | 
| Chris Lattner | 368328c | 2008-06-30 06:39:54 +0000 | [diff] [blame] | 612 | return; | 
|  | 613 | } | 
| Mike Stump | 1eb4433 | 2009-09-09 15:08:12 +0000 | [diff] [blame] | 614 |  | 
| Chris Lattner | 368328c | 2008-06-30 06:39:54 +0000 | [diff] [blame] | 615 | // For now, the radix is set to 8. If we discover that we have a | 
|  | 616 | // floating point constant, the radix will change to 10. Octal floating | 
| Mike Stump | 1eb4433 | 2009-09-09 15:08:12 +0000 | [diff] [blame] | 617 | // point constants are not permitted (only decimal and hexadecimal). | 
| Chris Lattner | 368328c | 2008-06-30 06:39:54 +0000 | [diff] [blame] | 618 | radix = 8; | 
|  | 619 | DigitsBegin = s; | 
|  | 620 | s = SkipOctalDigits(s); | 
|  | 621 | if (s == ThisTokEnd) | 
|  | 622 | return; // Done, simple octal number like 01234 | 
| Mike Stump | 1eb4433 | 2009-09-09 15:08:12 +0000 | [diff] [blame] | 623 |  | 
| Chris Lattner | 413d355 | 2008-06-30 06:44:49 +0000 | [diff] [blame] | 624 | // If we have some other non-octal digit that *is* a decimal digit, see if | 
|  | 625 | // this is part of a floating point number like 094.123 or 09e1. | 
|  | 626 | if (isdigit(*s)) { | 
|  | 627 | const char *EndDecimal = SkipDigits(s); | 
|  | 628 | if (EndDecimal[0] == '.' || EndDecimal[0] == 'e' || EndDecimal[0] == 'E') { | 
|  | 629 | s = EndDecimal; | 
|  | 630 | radix = 10; | 
|  | 631 | } | 
|  | 632 | } | 
| Mike Stump | 1eb4433 | 2009-09-09 15:08:12 +0000 | [diff] [blame] | 633 |  | 
| Chris Lattner | 413d355 | 2008-06-30 06:44:49 +0000 | [diff] [blame] | 634 | // If we have a hex digit other than 'e' (which denotes a FP exponent) then | 
|  | 635 | // the code is using an incorrect base. | 
| Chris Lattner | 368328c | 2008-06-30 06:39:54 +0000 | [diff] [blame] | 636 | if (isxdigit(*s) && *s != 'e' && *s != 'E') { | 
| Chris Lattner | ac92d82 | 2008-11-22 07:23:31 +0000 | [diff] [blame] | 637 | PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, s-ThisTokBegin), | 
| Chris Lattner | 5f9e272 | 2011-07-23 10:55:15 +0000 | [diff] [blame] | 638 | diag::err_invalid_octal_digit) << StringRef(s, 1); | 
| Chris Lattner | ac92d82 | 2008-11-22 07:23:31 +0000 | [diff] [blame] | 639 | hadError = true; | 
| Chris Lattner | 368328c | 2008-06-30 06:39:54 +0000 | [diff] [blame] | 640 | return; | 
|  | 641 | } | 
| Mike Stump | 1eb4433 | 2009-09-09 15:08:12 +0000 | [diff] [blame] | 642 |  | 
| Chris Lattner | 368328c | 2008-06-30 06:39:54 +0000 | [diff] [blame] | 643 | if (*s == '.') { | 
|  | 644 | s++; | 
|  | 645 | radix = 10; | 
|  | 646 | saw_period = true; | 
| Chris Lattner | 413d355 | 2008-06-30 06:44:49 +0000 | [diff] [blame] | 647 | s = SkipDigits(s); // Skip suffix. | 
| Chris Lattner | 368328c | 2008-06-30 06:39:54 +0000 | [diff] [blame] | 648 | } | 
|  | 649 | if (*s == 'e' || *s == 'E') { // exponent | 
|  | 650 | const char *Exponent = s; | 
|  | 651 | s++; | 
|  | 652 | radix = 10; | 
|  | 653 | saw_exponent = true; | 
|  | 654 | if (*s == '+' || *s == '-')  s++; // sign | 
|  | 655 | const char *first_non_digit = SkipDigits(s); | 
|  | 656 | if (first_non_digit != s) { | 
|  | 657 | s = first_non_digit; | 
|  | 658 | } else { | 
| Mike Stump | 1eb4433 | 2009-09-09 15:08:12 +0000 | [diff] [blame] | 659 | PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, Exponent-ThisTokBegin), | 
| Chris Lattner | ac92d82 | 2008-11-22 07:23:31 +0000 | [diff] [blame] | 660 | diag::err_exponent_has_no_digits); | 
|  | 661 | hadError = true; | 
| Chris Lattner | 368328c | 2008-06-30 06:39:54 +0000 | [diff] [blame] | 662 | return; | 
|  | 663 | } | 
|  | 664 | } | 
|  | 665 | } | 
|  | 666 |  | 
|  | 667 |  | 
| Reid Spencer | 5f016e2 | 2007-07-11 17:01:13 +0000 | [diff] [blame] | 668 | /// GetIntegerValue - Convert this numeric literal value to an APInt that | 
|  | 669 | /// matches Val's input width.  If there is an overflow, set Val to the low bits | 
|  | 670 | /// of the result and return true.  Otherwise, return false. | 
|  | 671 | bool NumericLiteralParser::GetIntegerValue(llvm::APInt &Val) { | 
| Daniel Dunbar | a179be3 | 2008-10-16 07:32:01 +0000 | [diff] [blame] | 672 | // Fast path: Compute a conservative bound on the maximum number of | 
|  | 673 | // bits per digit in this radix. If we can't possibly overflow a | 
|  | 674 | // uint64 based on that bound then do the simple conversion to | 
|  | 675 | // integer. This avoids the expensive overflow checking below, and | 
|  | 676 | // handles the common cases that matter (small decimal integers and | 
|  | 677 | // hex/octal values which don't overflow). | 
|  | 678 | unsigned MaxBitsPerDigit = 1; | 
| Mike Stump | 1eb4433 | 2009-09-09 15:08:12 +0000 | [diff] [blame] | 679 | while ((1U << MaxBitsPerDigit) < radix) | 
| Daniel Dunbar | a179be3 | 2008-10-16 07:32:01 +0000 | [diff] [blame] | 680 | MaxBitsPerDigit += 1; | 
|  | 681 | if ((SuffixBegin - DigitsBegin) * MaxBitsPerDigit <= 64) { | 
|  | 682 | uint64_t N = 0; | 
|  | 683 | for (s = DigitsBegin; s != SuffixBegin; ++s) | 
|  | 684 | N = N*radix + HexDigitValue(*s); | 
|  | 685 |  | 
|  | 686 | // This will truncate the value to Val's input width. Simply check | 
|  | 687 | // for overflow by comparing. | 
|  | 688 | Val = N; | 
|  | 689 | return Val.getZExtValue() != N; | 
|  | 690 | } | 
|  | 691 |  | 
| Reid Spencer | 5f016e2 | 2007-07-11 17:01:13 +0000 | [diff] [blame] | 692 | Val = 0; | 
|  | 693 | s = DigitsBegin; | 
|  | 694 |  | 
|  | 695 | llvm::APInt RadixVal(Val.getBitWidth(), radix); | 
|  | 696 | llvm::APInt CharVal(Val.getBitWidth(), 0); | 
|  | 697 | llvm::APInt OldVal = Val; | 
| Mike Stump | 1eb4433 | 2009-09-09 15:08:12 +0000 | [diff] [blame] | 698 |  | 
| Reid Spencer | 5f016e2 | 2007-07-11 17:01:13 +0000 | [diff] [blame] | 699 | bool OverflowOccurred = false; | 
|  | 700 | while (s < SuffixBegin) { | 
|  | 701 | unsigned C = HexDigitValue(*s++); | 
| Mike Stump | 1eb4433 | 2009-09-09 15:08:12 +0000 | [diff] [blame] | 702 |  | 
| Reid Spencer | 5f016e2 | 2007-07-11 17:01:13 +0000 | [diff] [blame] | 703 | // If this letter is out of bound for this radix, reject it. | 
|  | 704 | assert(C < radix && "NumericLiteralParser ctor should have rejected this"); | 
| Mike Stump | 1eb4433 | 2009-09-09 15:08:12 +0000 | [diff] [blame] | 705 |  | 
| Reid Spencer | 5f016e2 | 2007-07-11 17:01:13 +0000 | [diff] [blame] | 706 | CharVal = C; | 
| Mike Stump | 1eb4433 | 2009-09-09 15:08:12 +0000 | [diff] [blame] | 707 |  | 
| Reid Spencer | 5f016e2 | 2007-07-11 17:01:13 +0000 | [diff] [blame] | 708 | // Add the digit to the value in the appropriate radix.  If adding in digits | 
|  | 709 | // made the value smaller, then this overflowed. | 
|  | 710 | OldVal = Val; | 
|  | 711 |  | 
|  | 712 | // Multiply by radix, did overflow occur on the multiply? | 
|  | 713 | Val *= RadixVal; | 
|  | 714 | OverflowOccurred |= Val.udiv(RadixVal) != OldVal; | 
|  | 715 |  | 
| Reid Spencer | 5f016e2 | 2007-07-11 17:01:13 +0000 | [diff] [blame] | 716 | // Add value, did overflow occur on the value? | 
| Daniel Dunbar | d70cb64 | 2008-10-16 06:39:30 +0000 | [diff] [blame] | 717 | //   (a + b) ult b  <=> overflow | 
| Reid Spencer | 5f016e2 | 2007-07-11 17:01:13 +0000 | [diff] [blame] | 718 | Val += CharVal; | 
| Reid Spencer | 5f016e2 | 2007-07-11 17:01:13 +0000 | [diff] [blame] | 719 | OverflowOccurred |= Val.ult(CharVal); | 
|  | 720 | } | 
|  | 721 | return OverflowOccurred; | 
|  | 722 | } | 
|  | 723 |  | 
| John McCall | 94c939d | 2009-12-24 09:08:04 +0000 | [diff] [blame] | 724 | llvm::APFloat::opStatus | 
|  | 725 | NumericLiteralParser::GetFloatValue(llvm::APFloat &Result) { | 
| Ted Kremenek | 427d5af | 2007-11-26 23:12:30 +0000 | [diff] [blame] | 726 | using llvm::APFloat; | 
| Mike Stump | 1eb4433 | 2009-09-09 15:08:12 +0000 | [diff] [blame] | 727 |  | 
| Erick Tryzelaar | e9f195f | 2009-08-16 23:36:28 +0000 | [diff] [blame] | 728 | unsigned n = std::min(SuffixBegin - ThisTokBegin, ThisTokEnd - ThisTokBegin); | 
| John McCall | 94c939d | 2009-12-24 09:08:04 +0000 | [diff] [blame] | 729 | return Result.convertFromString(StringRef(ThisTokBegin, n), | 
|  | 730 | APFloat::rmNearestTiesToEven); | 
| Reid Spencer | 5f016e2 | 2007-07-11 17:01:13 +0000 | [diff] [blame] | 731 | } | 
|  | 732 |  | 
| Reid Spencer | 5f016e2 | 2007-07-11 17:01:13 +0000 | [diff] [blame] | 733 |  | 
| Richard Smith | 5cc2c6e | 2012-03-05 04:02:15 +0000 | [diff] [blame^] | 734 | ///       user-defined-character-literal: [C++11 lex.ext] | 
|  | 735 | ///         character-literal ud-suffix | 
|  | 736 | ///       ud-suffix: | 
|  | 737 | ///         identifier | 
|  | 738 | ///       character-literal: [C++11 lex.ccon] | 
| Craig Topper | 2fa4e86 | 2011-08-11 04:06:15 +0000 | [diff] [blame] | 739 | ///         ' c-char-sequence ' | 
|  | 740 | ///         u' c-char-sequence ' | 
|  | 741 | ///         U' c-char-sequence ' | 
|  | 742 | ///         L' c-char-sequence ' | 
|  | 743 | ///       c-char-sequence: | 
|  | 744 | ///         c-char | 
|  | 745 | ///         c-char-sequence c-char | 
|  | 746 | ///       c-char: | 
|  | 747 | ///         any member of the source character set except the single-quote ', | 
|  | 748 | ///           backslash \, or new-line character | 
|  | 749 | ///         escape-sequence | 
|  | 750 | ///         universal-character-name | 
| Richard Smith | 5cc2c6e | 2012-03-05 04:02:15 +0000 | [diff] [blame^] | 751 | ///       escape-sequence: | 
| Craig Topper | 2fa4e86 | 2011-08-11 04:06:15 +0000 | [diff] [blame] | 752 | ///         simple-escape-sequence | 
|  | 753 | ///         octal-escape-sequence | 
|  | 754 | ///         hexadecimal-escape-sequence | 
|  | 755 | ///       simple-escape-sequence: | 
| NAKAMURA Takumi | ddddd48 | 2011-08-12 05:49:51 +0000 | [diff] [blame] | 756 | ///         one of \' \" \? \\ \a \b \f \n \r \t \v | 
| Craig Topper | 2fa4e86 | 2011-08-11 04:06:15 +0000 | [diff] [blame] | 757 | ///       octal-escape-sequence: | 
|  | 758 | ///         \ octal-digit | 
|  | 759 | ///         \ octal-digit octal-digit | 
|  | 760 | ///         \ octal-digit octal-digit octal-digit | 
|  | 761 | ///       hexadecimal-escape-sequence: | 
|  | 762 | ///         \x hexadecimal-digit | 
|  | 763 | ///         hexadecimal-escape-sequence hexadecimal-digit | 
| Richard Smith | 5cc2c6e | 2012-03-05 04:02:15 +0000 | [diff] [blame^] | 764 | ///       universal-character-name: [C++11 lex.charset] | 
| Craig Topper | 2fa4e86 | 2011-08-11 04:06:15 +0000 | [diff] [blame] | 765 | ///         \u hex-quad | 
|  | 766 | ///         \U hex-quad hex-quad | 
|  | 767 | ///       hex-quad: | 
|  | 768 | ///         hex-digit hex-digit hex-digit hex-digit | 
|  | 769 | /// | 
| Reid Spencer | 5f016e2 | 2007-07-11 17:01:13 +0000 | [diff] [blame] | 770 | CharLiteralParser::CharLiteralParser(const char *begin, const char *end, | 
| Douglas Gregor | 5cee119 | 2011-07-27 05:40:30 +0000 | [diff] [blame] | 771 | SourceLocation Loc, Preprocessor &PP, | 
|  | 772 | tok::TokenKind kind) { | 
| Seth Cantrell | be77352 | 2012-01-18 12:27:04 +0000 | [diff] [blame] | 773 | // At this point we know that the character matches the regex "(L|u|U)?'.*'". | 
| Reid Spencer | 5f016e2 | 2007-07-11 17:01:13 +0000 | [diff] [blame] | 774 | HadError = false; | 
| Mike Stump | 1eb4433 | 2009-09-09 15:08:12 +0000 | [diff] [blame] | 775 |  | 
| Douglas Gregor | 5cee119 | 2011-07-27 05:40:30 +0000 | [diff] [blame] | 776 | Kind = kind; | 
|  | 777 |  | 
| Seth Cantrell | be77352 | 2012-01-18 12:27:04 +0000 | [diff] [blame] | 778 | // Skip over wide character determinant. | 
|  | 779 | if (Kind != tok::char_constant) { | 
| Douglas Gregor | 5cee119 | 2011-07-27 05:40:30 +0000 | [diff] [blame] | 780 | ++begin; | 
|  | 781 | } | 
| Mike Stump | 1eb4433 | 2009-09-09 15:08:12 +0000 | [diff] [blame] | 782 |  | 
| Reid Spencer | 5f016e2 | 2007-07-11 17:01:13 +0000 | [diff] [blame] | 783 | // Skip over the entry quote. | 
|  | 784 | assert(begin[0] == '\'' && "Invalid token lexed"); | 
|  | 785 | ++begin; | 
|  | 786 |  | 
| Richard Smith | 5cc2c6e | 2012-03-05 04:02:15 +0000 | [diff] [blame^] | 787 | // Remove an optional ud-suffix. | 
|  | 788 | if (end[-1] != '\'') { | 
|  | 789 | const char *UDSuffixEnd = end; | 
|  | 790 | do { | 
|  | 791 | --end; | 
|  | 792 | } while (end[-1] != '\''); | 
|  | 793 | UDSuffixBuf.assign(end, UDSuffixEnd); | 
|  | 794 | } | 
|  | 795 |  | 
| Seth Cantrell | be77352 | 2012-01-18 12:27:04 +0000 | [diff] [blame] | 796 | // Trim the ending quote. | 
| Richard Smith | 5cc2c6e | 2012-03-05 04:02:15 +0000 | [diff] [blame^] | 797 | assert(end != begin && "Invalid token lexed"); | 
| Seth Cantrell | be77352 | 2012-01-18 12:27:04 +0000 | [diff] [blame] | 798 | --end; | 
|  | 799 |  | 
| Mike Stump | 1eb4433 | 2009-09-09 15:08:12 +0000 | [diff] [blame] | 800 | // FIXME: The "Value" is an uint64_t so we can handle char literals of | 
| Chris Lattner | fc8f0e1 | 2011-04-15 05:22:18 +0000 | [diff] [blame] | 801 | // up to 64-bits. | 
| Reid Spencer | 5f016e2 | 2007-07-11 17:01:13 +0000 | [diff] [blame] | 802 | // FIXME: This extensively assumes that 'char' is 8-bits. | 
| Chris Lattner | 98be494 | 2008-03-05 18:54:05 +0000 | [diff] [blame] | 803 | assert(PP.getTargetInfo().getCharWidth() == 8 && | 
| Reid Spencer | 5f016e2 | 2007-07-11 17:01:13 +0000 | [diff] [blame] | 804 | "Assumes char is 8 bits"); | 
| Chris Lattner | e3ad881 | 2009-04-28 21:51:46 +0000 | [diff] [blame] | 805 | assert(PP.getTargetInfo().getIntWidth() <= 64 && | 
|  | 806 | (PP.getTargetInfo().getIntWidth() & 7) == 0 && | 
|  | 807 | "Assumes sizeof(int) on target is <= 64 and a multiple of char"); | 
|  | 808 | assert(PP.getTargetInfo().getWCharWidth() <= 64 && | 
|  | 809 | "Assumes sizeof(wchar) on target is <= 64"); | 
| Sanjiv Gupta | 4bc11af | 2009-04-21 02:21:29 +0000 | [diff] [blame] | 810 |  | 
| Seth Cantrell | be77352 | 2012-01-18 12:27:04 +0000 | [diff] [blame] | 811 | SmallVector<uint32_t,4> codepoint_buffer; | 
|  | 812 | codepoint_buffer.resize(end-begin); | 
|  | 813 | uint32_t *buffer_begin = &codepoint_buffer.front(); | 
|  | 814 | uint32_t *buffer_end = buffer_begin + codepoint_buffer.size(); | 
| Mike Stump | 1eb4433 | 2009-09-09 15:08:12 +0000 | [diff] [blame] | 815 |  | 
| Seth Cantrell | be77352 | 2012-01-18 12:27:04 +0000 | [diff] [blame] | 816 | // Unicode escapes representing characters that cannot be correctly | 
|  | 817 | // represented in a single code unit are disallowed in character literals | 
|  | 818 | // by this implementation. | 
|  | 819 | uint32_t largest_character_for_kind; | 
|  | 820 | if (tok::wide_char_constant == Kind) { | 
|  | 821 | largest_character_for_kind = 0xFFFFFFFFu >> (32-PP.getTargetInfo().getWCharWidth()); | 
|  | 822 | } else if (tok::utf16_char_constant == Kind) { | 
|  | 823 | largest_character_for_kind = 0xFFFF; | 
|  | 824 | } else if (tok::utf32_char_constant == Kind) { | 
|  | 825 | largest_character_for_kind = 0x10FFFF; | 
|  | 826 | } else { | 
|  | 827 | largest_character_for_kind = 0x7Fu; | 
| Chris Lattner | e3ad881 | 2009-04-28 21:51:46 +0000 | [diff] [blame] | 828 | } | 
|  | 829 |  | 
| Seth Cantrell | be77352 | 2012-01-18 12:27:04 +0000 | [diff] [blame] | 830 | while (begin!=end) { | 
|  | 831 | // Is this a span of non-escape characters? | 
|  | 832 | if (begin[0] != '\\') { | 
|  | 833 | char const *start = begin; | 
|  | 834 | do { | 
|  | 835 | ++begin; | 
|  | 836 | } while (begin != end && *begin != '\\'); | 
|  | 837 |  | 
| Eli Friedman | 9135930 | 2012-02-11 05:08:10 +0000 | [diff] [blame] | 838 | char const *tmp_in_start = start; | 
|  | 839 | uint32_t *tmp_out_start = buffer_begin; | 
| Seth Cantrell | be77352 | 2012-01-18 12:27:04 +0000 | [diff] [blame] | 840 | ConversionResult res = | 
|  | 841 | ConvertUTF8toUTF32(reinterpret_cast<UTF8 const **>(&start), | 
|  | 842 | reinterpret_cast<UTF8 const *>(begin), | 
|  | 843 | &buffer_begin,buffer_end,strictConversion); | 
|  | 844 | if (res!=conversionOK) { | 
| Eli Friedman | 9135930 | 2012-02-11 05:08:10 +0000 | [diff] [blame] | 845 | // If we see bad encoding for unprefixed character literals, warn and | 
|  | 846 | // simply copy the byte values, for compatibility with gcc and | 
|  | 847 | // older versions of clang. | 
|  | 848 | bool NoErrorOnBadEncoding = isAscii(); | 
|  | 849 | unsigned Msg = diag::err_bad_character_encoding; | 
|  | 850 | if (NoErrorOnBadEncoding) | 
|  | 851 | Msg = diag::warn_bad_character_encoding; | 
|  | 852 | PP.Diag(Loc, Msg); | 
|  | 853 | if (NoErrorOnBadEncoding) { | 
|  | 854 | start = tmp_in_start; | 
|  | 855 | buffer_begin = tmp_out_start; | 
|  | 856 | for ( ; start != begin; ++start, ++buffer_begin) | 
|  | 857 | *buffer_begin = static_cast<uint8_t>(*start); | 
|  | 858 | } else { | 
|  | 859 | HadError = true; | 
|  | 860 | } | 
| Seth Cantrell | be77352 | 2012-01-18 12:27:04 +0000 | [diff] [blame] | 861 | } else { | 
| Eli Friedman | 9135930 | 2012-02-11 05:08:10 +0000 | [diff] [blame] | 862 | for (; tmp_out_start <buffer_begin; ++tmp_out_start) { | 
|  | 863 | if (*tmp_out_start > largest_character_for_kind) { | 
| Seth Cantrell | be77352 | 2012-01-18 12:27:04 +0000 | [diff] [blame] | 864 | HadError = true; | 
|  | 865 | PP.Diag(Loc, diag::err_character_too_large); | 
|  | 866 | } | 
|  | 867 | } | 
|  | 868 | } | 
|  | 869 |  | 
|  | 870 | continue; | 
|  | 871 | } | 
|  | 872 | // Is this a Universal Character Name excape? | 
|  | 873 | if (begin[1] == 'u' || begin[1] == 'U') { | 
|  | 874 | unsigned short UcnLen = 0; | 
|  | 875 | if (!ProcessUCNEscape(begin, end, *buffer_begin, UcnLen, | 
|  | 876 | FullSourceLoc(Loc, PP.getSourceManager()), | 
|  | 877 | &PP.getDiagnostics(), PP.getLangOptions(), | 
|  | 878 | true)) | 
|  | 879 | { | 
|  | 880 | HadError = true; | 
|  | 881 | } else if (*buffer_begin > largest_character_for_kind) { | 
|  | 882 | HadError = true; | 
|  | 883 | PP.Diag(Loc,diag::err_character_too_large); | 
|  | 884 | } | 
|  | 885 |  | 
|  | 886 | ++buffer_begin; | 
|  | 887 | continue; | 
|  | 888 | } | 
|  | 889 | unsigned CharWidth = getCharWidth(Kind, PP.getTargetInfo()); | 
|  | 890 | uint64_t result = | 
|  | 891 | ProcessCharEscape(begin, end, HadError, | 
|  | 892 | FullSourceLoc(Loc,PP.getSourceManager()), | 
|  | 893 | CharWidth, &PP.getDiagnostics()); | 
|  | 894 | *buffer_begin++ = result; | 
|  | 895 | } | 
|  | 896 |  | 
|  | 897 | unsigned NumCharsSoFar = buffer_begin-&codepoint_buffer.front(); | 
|  | 898 |  | 
| Chris Lattner | e3ad881 | 2009-04-28 21:51:46 +0000 | [diff] [blame] | 899 | if (NumCharsSoFar > 1) { | 
| Seth Cantrell | be77352 | 2012-01-18 12:27:04 +0000 | [diff] [blame] | 900 | if (isWide()) | 
| Douglas Gregor | 5cee119 | 2011-07-27 05:40:30 +0000 | [diff] [blame] | 901 | PP.Diag(Loc, diag::warn_extraneous_char_constant); | 
| Seth Cantrell | be77352 | 2012-01-18 12:27:04 +0000 | [diff] [blame] | 902 | else if (isAscii() && NumCharsSoFar == 4) | 
|  | 903 | PP.Diag(Loc, diag::ext_four_char_character_literal); | 
|  | 904 | else if (isAscii()) | 
| Chris Lattner | e3ad881 | 2009-04-28 21:51:46 +0000 | [diff] [blame] | 905 | PP.Diag(Loc, diag::ext_multichar_character_literal); | 
|  | 906 | else | 
| Seth Cantrell | be77352 | 2012-01-18 12:27:04 +0000 | [diff] [blame] | 907 | PP.Diag(Loc, diag::err_multichar_utf_character_literal); | 
| Eli Friedman | 2a1c363 | 2009-06-01 05:25:02 +0000 | [diff] [blame] | 908 | IsMultiChar = true; | 
| Daniel Dunbar | 930b71a | 2009-07-29 01:46:05 +0000 | [diff] [blame] | 909 | } else | 
|  | 910 | IsMultiChar = false; | 
| Sanjiv Gupta | 4bc11af | 2009-04-21 02:21:29 +0000 | [diff] [blame] | 911 |  | 
| Seth Cantrell | be77352 | 2012-01-18 12:27:04 +0000 | [diff] [blame] | 912 | llvm::APInt LitVal(PP.getTargetInfo().getIntWidth(), 0); | 
|  | 913 |  | 
|  | 914 | // Narrow character literals act as though their value is concatenated | 
|  | 915 | // in this implementation, but warn on overflow. | 
|  | 916 | bool multi_char_too_long = false; | 
|  | 917 | if (isAscii() && isMultiChar()) { | 
|  | 918 | LitVal = 0; | 
|  | 919 | for (size_t i=0;i<NumCharsSoFar;++i) { | 
|  | 920 | // check for enough leading zeros to shift into | 
|  | 921 | multi_char_too_long |= (LitVal.countLeadingZeros() < 8); | 
|  | 922 | LitVal <<= 8; | 
|  | 923 | LitVal = LitVal + (codepoint_buffer[i] & 0xFF); | 
|  | 924 | } | 
|  | 925 | } else if (NumCharsSoFar > 0) { | 
|  | 926 | // otherwise just take the last character | 
|  | 927 | LitVal = buffer_begin[-1]; | 
|  | 928 | } | 
|  | 929 |  | 
|  | 930 | if (!HadError && multi_char_too_long) { | 
|  | 931 | PP.Diag(Loc,diag::warn_char_constant_too_large); | 
|  | 932 | } | 
|  | 933 |  | 
| Sanjiv Gupta | 4bc11af | 2009-04-21 02:21:29 +0000 | [diff] [blame] | 934 | // Transfer the value from APInt to uint64_t | 
|  | 935 | Value = LitVal.getZExtValue(); | 
| Mike Stump | 1eb4433 | 2009-09-09 15:08:12 +0000 | [diff] [blame] | 936 |  | 
| Reid Spencer | 5f016e2 | 2007-07-11 17:01:13 +0000 | [diff] [blame] | 937 | // If this is a single narrow character, sign extend it (e.g. '\xFF' is "-1") | 
|  | 938 | // if 'char' is signed for this target (C99 6.4.4.4p10).  Note that multiple | 
|  | 939 | // character constants are not sign extended in the this implementation: | 
|  | 940 | // '\xFF\xFF' = 65536 and '\x0\xFF' = 255, which matches GCC. | 
| Douglas Gregor | 5cee119 | 2011-07-27 05:40:30 +0000 | [diff] [blame] | 941 | if (isAscii() && NumCharsSoFar == 1 && (Value & 128) && | 
| Eli Friedman | 15b9176 | 2009-06-05 07:05:05 +0000 | [diff] [blame] | 942 | PP.getLangOptions().CharIsSigned) | 
| Reid Spencer | 5f016e2 | 2007-07-11 17:01:13 +0000 | [diff] [blame] | 943 | Value = (signed char)Value; | 
|  | 944 | } | 
|  | 945 |  | 
|  | 946 |  | 
| Craig Topper | 2fa4e86 | 2011-08-11 04:06:15 +0000 | [diff] [blame] | 947 | ///       string-literal: [C++0x lex.string] | 
|  | 948 | ///         encoding-prefix " [s-char-sequence] " | 
|  | 949 | ///         encoding-prefix R raw-string | 
|  | 950 | ///       encoding-prefix: | 
|  | 951 | ///         u8 | 
|  | 952 | ///         u | 
|  | 953 | ///         U | 
|  | 954 | ///         L | 
| Reid Spencer | 5f016e2 | 2007-07-11 17:01:13 +0000 | [diff] [blame] | 955 | ///       s-char-sequence: | 
|  | 956 | ///         s-char | 
|  | 957 | ///         s-char-sequence s-char | 
|  | 958 | ///       s-char: | 
| Craig Topper | 2fa4e86 | 2011-08-11 04:06:15 +0000 | [diff] [blame] | 959 | ///         any member of the source character set except the double-quote ", | 
|  | 960 | ///           backslash \, or new-line character | 
|  | 961 | ///         escape-sequence | 
| Reid Spencer | 5f016e2 | 2007-07-11 17:01:13 +0000 | [diff] [blame] | 962 | ///         universal-character-name | 
| Craig Topper | 2fa4e86 | 2011-08-11 04:06:15 +0000 | [diff] [blame] | 963 | ///       raw-string: | 
|  | 964 | ///         " d-char-sequence ( r-char-sequence ) d-char-sequence " | 
|  | 965 | ///       r-char-sequence: | 
|  | 966 | ///         r-char | 
|  | 967 | ///         r-char-sequence r-char | 
|  | 968 | ///       r-char: | 
|  | 969 | ///         any member of the source character set, except a right parenthesis ) | 
|  | 970 | ///           followed by the initial d-char-sequence (which may be empty) | 
|  | 971 | ///           followed by a double quote ". | 
|  | 972 | ///       d-char-sequence: | 
|  | 973 | ///         d-char | 
|  | 974 | ///         d-char-sequence d-char | 
|  | 975 | ///       d-char: | 
|  | 976 | ///         any member of the basic source character set except: | 
|  | 977 | ///           space, the left parenthesis (, the right parenthesis ), | 
|  | 978 | ///           the backslash \, and the control characters representing horizontal | 
|  | 979 | ///           tab, vertical tab, form feed, and newline. | 
|  | 980 | ///       escape-sequence: [C++0x lex.ccon] | 
|  | 981 | ///         simple-escape-sequence | 
|  | 982 | ///         octal-escape-sequence | 
|  | 983 | ///         hexadecimal-escape-sequence | 
|  | 984 | ///       simple-escape-sequence: | 
| NAKAMURA Takumi | ddddd48 | 2011-08-12 05:49:51 +0000 | [diff] [blame] | 985 | ///         one of \' \" \? \\ \a \b \f \n \r \t \v | 
| Craig Topper | 2fa4e86 | 2011-08-11 04:06:15 +0000 | [diff] [blame] | 986 | ///       octal-escape-sequence: | 
|  | 987 | ///         \ octal-digit | 
|  | 988 | ///         \ octal-digit octal-digit | 
|  | 989 | ///         \ octal-digit octal-digit octal-digit | 
|  | 990 | ///       hexadecimal-escape-sequence: | 
|  | 991 | ///         \x hexadecimal-digit | 
|  | 992 | ///         hexadecimal-escape-sequence hexadecimal-digit | 
| Reid Spencer | 5f016e2 | 2007-07-11 17:01:13 +0000 | [diff] [blame] | 993 | ///       universal-character-name: | 
|  | 994 | ///         \u hex-quad | 
|  | 995 | ///         \U hex-quad hex-quad | 
|  | 996 | ///       hex-quad: | 
|  | 997 | ///         hex-digit hex-digit hex-digit hex-digit | 
|  | 998 | /// | 
|  | 999 | StringLiteralParser:: | 
| Chris Lattner | d217773 | 2007-07-20 16:59:19 +0000 | [diff] [blame] | 1000 | StringLiteralParser(const Token *StringToks, unsigned NumStringToks, | 
| Chris Lattner | 0833dd0 | 2010-11-17 07:21:13 +0000 | [diff] [blame] | 1001 | Preprocessor &PP, bool Complain) | 
|  | 1002 | : SM(PP.getSourceManager()), Features(PP.getLangOptions()), | 
| Argyrios Kyrtzidis | 403de3f | 2011-05-17 22:09:56 +0000 | [diff] [blame] | 1003 | Target(PP.getTargetInfo()), Diags(Complain ? &PP.getDiagnostics() : 0), | 
| Douglas Gregor | 5cee119 | 2011-07-27 05:40:30 +0000 | [diff] [blame] | 1004 | MaxTokenLength(0), SizeBound(0), CharByteWidth(0), Kind(tok::unknown), | 
|  | 1005 | ResultPtr(ResultBuf.data()), hadError(false), Pascal(false) { | 
| Chris Lattner | 0833dd0 | 2010-11-17 07:21:13 +0000 | [diff] [blame] | 1006 | init(StringToks, NumStringToks); | 
|  | 1007 | } | 
|  | 1008 |  | 
|  | 1009 | void StringLiteralParser::init(const Token *StringToks, unsigned NumStringToks){ | 
| Argyrios Kyrtzidis | 403de3f | 2011-05-17 22:09:56 +0000 | [diff] [blame] | 1010 | // The literal token may have come from an invalid source location (e.g. due | 
|  | 1011 | // to a PCH error), in which case the token length will be 0. | 
|  | 1012 | if (NumStringToks == 0 || StringToks[0].getLength() < 2) { | 
|  | 1013 | hadError = true; | 
|  | 1014 | return; | 
|  | 1015 | } | 
|  | 1016 |  | 
| Reid Spencer | 5f016e2 | 2007-07-11 17:01:13 +0000 | [diff] [blame] | 1017 | // Scan all of the string portions, remember the max individual token length, | 
|  | 1018 | // computing a bound on the concatenated string length, and see whether any | 
|  | 1019 | // piece is a wide-string.  If any of the string portions is a wide-string | 
|  | 1020 | // literal, the result is a wide-string literal [C99 6.4.5p4]. | 
| Argyrios Kyrtzidis | 403de3f | 2011-05-17 22:09:56 +0000 | [diff] [blame] | 1021 | assert(NumStringToks && "expected at least one token"); | 
| Sean Hunt | 6cf7502 | 2010-08-30 17:47:05 +0000 | [diff] [blame] | 1022 | MaxTokenLength = StringToks[0].getLength(); | 
| Argyrios Kyrtzidis | 403de3f | 2011-05-17 22:09:56 +0000 | [diff] [blame] | 1023 | assert(StringToks[0].getLength() >= 2 && "literal token is invalid!"); | 
| Sean Hunt | 6cf7502 | 2010-08-30 17:47:05 +0000 | [diff] [blame] | 1024 | SizeBound = StringToks[0].getLength()-2;  // -2 for "". | 
| Douglas Gregor | 5cee119 | 2011-07-27 05:40:30 +0000 | [diff] [blame] | 1025 | Kind = StringToks[0].getKind(); | 
| Sean Hunt | 6cf7502 | 2010-08-30 17:47:05 +0000 | [diff] [blame] | 1026 |  | 
|  | 1027 | hadError = false; | 
| Reid Spencer | 5f016e2 | 2007-07-11 17:01:13 +0000 | [diff] [blame] | 1028 |  | 
|  | 1029 | // Implement Translation Phase #6: concatenation of string literals | 
|  | 1030 | /// (C99 5.1.1.2p1).  The common case is only one string fragment. | 
|  | 1031 | for (unsigned i = 1; i != NumStringToks; ++i) { | 
| Argyrios Kyrtzidis | 403de3f | 2011-05-17 22:09:56 +0000 | [diff] [blame] | 1032 | if (StringToks[i].getLength() < 2) { | 
|  | 1033 | hadError = true; | 
|  | 1034 | return; | 
|  | 1035 | } | 
|  | 1036 |  | 
| Reid Spencer | 5f016e2 | 2007-07-11 17:01:13 +0000 | [diff] [blame] | 1037 | // The string could be shorter than this if it needs cleaning, but this is a | 
|  | 1038 | // reasonable bound, which is all we need. | 
| Argyrios Kyrtzidis | 403de3f | 2011-05-17 22:09:56 +0000 | [diff] [blame] | 1039 | assert(StringToks[i].getLength() >= 2 && "literal token is invalid!"); | 
| Sean Hunt | 6cf7502 | 2010-08-30 17:47:05 +0000 | [diff] [blame] | 1040 | SizeBound += StringToks[i].getLength()-2;  // -2 for "". | 
| Mike Stump | 1eb4433 | 2009-09-09 15:08:12 +0000 | [diff] [blame] | 1041 |  | 
| Reid Spencer | 5f016e2 | 2007-07-11 17:01:13 +0000 | [diff] [blame] | 1042 | // Remember maximum string piece length. | 
| Sean Hunt | 6cf7502 | 2010-08-30 17:47:05 +0000 | [diff] [blame] | 1043 | if (StringToks[i].getLength() > MaxTokenLength) | 
|  | 1044 | MaxTokenLength = StringToks[i].getLength(); | 
| Mike Stump | 1eb4433 | 2009-09-09 15:08:12 +0000 | [diff] [blame] | 1045 |  | 
| Douglas Gregor | 5cee119 | 2011-07-27 05:40:30 +0000 | [diff] [blame] | 1046 | // Remember if we see any wide or utf-8/16/32 strings. | 
|  | 1047 | // Also check for illegal concatenations. | 
|  | 1048 | if (StringToks[i].isNot(Kind) && StringToks[i].isNot(tok::string_literal)) { | 
|  | 1049 | if (isAscii()) { | 
|  | 1050 | Kind = StringToks[i].getKind(); | 
|  | 1051 | } else { | 
|  | 1052 | if (Diags) | 
|  | 1053 | Diags->Report(FullSourceLoc(StringToks[i].getLocation(), SM), | 
|  | 1054 | diag::err_unsupported_string_concat); | 
|  | 1055 | hadError = true; | 
|  | 1056 | } | 
|  | 1057 | } | 
| Reid Spencer | 5f016e2 | 2007-07-11 17:01:13 +0000 | [diff] [blame] | 1058 | } | 
| Chris Lattner | dbb1ecc | 2009-02-26 23:01:51 +0000 | [diff] [blame] | 1059 |  | 
| Reid Spencer | 5f016e2 | 2007-07-11 17:01:13 +0000 | [diff] [blame] | 1060 | // Include space for the null terminator. | 
|  | 1061 | ++SizeBound; | 
| Mike Stump | 1eb4433 | 2009-09-09 15:08:12 +0000 | [diff] [blame] | 1062 |  | 
| Reid Spencer | 5f016e2 | 2007-07-11 17:01:13 +0000 | [diff] [blame] | 1063 | // TODO: K&R warning: "traditional C rejects string constant concatenation" | 
| Mike Stump | 1eb4433 | 2009-09-09 15:08:12 +0000 | [diff] [blame] | 1064 |  | 
| Douglas Gregor | 5cee119 | 2011-07-27 05:40:30 +0000 | [diff] [blame] | 1065 | // Get the width in bytes of char/wchar_t/char16_t/char32_t | 
|  | 1066 | CharByteWidth = getCharWidth(Kind, Target); | 
|  | 1067 | assert((CharByteWidth & 7) == 0 && "Assumes character size is byte multiple"); | 
|  | 1068 | CharByteWidth /= 8; | 
| Mike Stump | 1eb4433 | 2009-09-09 15:08:12 +0000 | [diff] [blame] | 1069 |  | 
| Reid Spencer | 5f016e2 | 2007-07-11 17:01:13 +0000 | [diff] [blame] | 1070 | // The output buffer size needs to be large enough to hold wide characters. | 
|  | 1071 | // This is a worst-case assumption which basically corresponds to L"" "long". | 
| Douglas Gregor | 5cee119 | 2011-07-27 05:40:30 +0000 | [diff] [blame] | 1072 | SizeBound *= CharByteWidth; | 
| Mike Stump | 1eb4433 | 2009-09-09 15:08:12 +0000 | [diff] [blame] | 1073 |  | 
| Reid Spencer | 5f016e2 | 2007-07-11 17:01:13 +0000 | [diff] [blame] | 1074 | // Size the temporary buffer to hold the result string data. | 
|  | 1075 | ResultBuf.resize(SizeBound); | 
| Mike Stump | 1eb4433 | 2009-09-09 15:08:12 +0000 | [diff] [blame] | 1076 |  | 
| Reid Spencer | 5f016e2 | 2007-07-11 17:01:13 +0000 | [diff] [blame] | 1077 | // Likewise, but for each string piece. | 
| Dylan Noblesmith | f7ccbad | 2012-02-05 02:13:05 +0000 | [diff] [blame] | 1078 | SmallString<512> TokenBuf; | 
| Reid Spencer | 5f016e2 | 2007-07-11 17:01:13 +0000 | [diff] [blame] | 1079 | TokenBuf.resize(MaxTokenLength); | 
| Mike Stump | 1eb4433 | 2009-09-09 15:08:12 +0000 | [diff] [blame] | 1080 |  | 
| Reid Spencer | 5f016e2 | 2007-07-11 17:01:13 +0000 | [diff] [blame] | 1081 | // Loop over all the strings, getting their spelling, and expanding them to | 
|  | 1082 | // wide strings as appropriate. | 
|  | 1083 | ResultPtr = &ResultBuf[0];   // Next byte to fill in. | 
| Mike Stump | 1eb4433 | 2009-09-09 15:08:12 +0000 | [diff] [blame] | 1084 |  | 
| Anders Carlsson | ee98ac5 | 2007-10-15 02:50:23 +0000 | [diff] [blame] | 1085 | Pascal = false; | 
| Mike Stump | 1eb4433 | 2009-09-09 15:08:12 +0000 | [diff] [blame] | 1086 |  | 
| Richard Smith | 5cc2c6e | 2012-03-05 04:02:15 +0000 | [diff] [blame^] | 1087 | SourceLocation UDSuffixTokLoc; | 
|  | 1088 |  | 
| Reid Spencer | 5f016e2 | 2007-07-11 17:01:13 +0000 | [diff] [blame] | 1089 | for (unsigned i = 0, e = NumStringToks; i != e; ++i) { | 
|  | 1090 | const char *ThisTokBuf = &TokenBuf[0]; | 
|  | 1091 | // Get the spelling of the token, which eliminates trigraphs, etc.  We know | 
|  | 1092 | // that ThisTokBuf points to a buffer that is big enough for the whole token | 
|  | 1093 | // and 'spelled' tokens can only shrink. | 
| Douglas Gregor | 50f6af7 | 2010-03-16 05:20:39 +0000 | [diff] [blame] | 1094 | bool StringInvalid = false; | 
| Chris Lattner | 0833dd0 | 2010-11-17 07:21:13 +0000 | [diff] [blame] | 1095 | unsigned ThisTokLen = | 
| Chris Lattner | b060727 | 2010-11-17 07:26:20 +0000 | [diff] [blame] | 1096 | Lexer::getSpelling(StringToks[i], ThisTokBuf, SM, Features, | 
|  | 1097 | &StringInvalid); | 
| Douglas Gregor | 50f6af7 | 2010-03-16 05:20:39 +0000 | [diff] [blame] | 1098 | if (StringInvalid) { | 
| Douglas Gregor | 5cee119 | 2011-07-27 05:40:30 +0000 | [diff] [blame] | 1099 | hadError = true; | 
| Douglas Gregor | 50f6af7 | 2010-03-16 05:20:39 +0000 | [diff] [blame] | 1100 | continue; | 
|  | 1101 | } | 
|  | 1102 |  | 
| Richard Smith | 5cc2c6e | 2012-03-05 04:02:15 +0000 | [diff] [blame^] | 1103 | const char *ThisTokEnd = ThisTokBuf+ThisTokLen; | 
|  | 1104 |  | 
|  | 1105 | // Remove an optional ud-suffix. | 
|  | 1106 | if (ThisTokEnd[-1] != '"') { | 
|  | 1107 | const char *UDSuffixEnd = ThisTokEnd; | 
|  | 1108 | do { | 
|  | 1109 | --ThisTokEnd; | 
|  | 1110 | } while (ThisTokEnd[-1] != '"'); | 
|  | 1111 |  | 
|  | 1112 | StringRef UDSuffix(ThisTokEnd, UDSuffixEnd - ThisTokEnd); | 
|  | 1113 |  | 
|  | 1114 | if (UDSuffixBuf.empty()) { | 
|  | 1115 | UDSuffixBuf.assign(UDSuffix); | 
|  | 1116 | UDSuffixTokLoc = StringToks[i].getLocation(); | 
|  | 1117 | } else if (!UDSuffixBuf.equals(UDSuffix)) { | 
|  | 1118 | // C++11 [lex.ext]p8: At the end of phase 6, if a string literal is the | 
|  | 1119 | // result of a concatenation involving at least one user-defined-string- | 
|  | 1120 | // literal, all the participating user-defined-string-literals shall | 
|  | 1121 | // have the same ud-suffix. | 
|  | 1122 | if (Diags) { | 
|  | 1123 | SourceLocation TokLoc = StringToks[i].getLocation(); | 
|  | 1124 | Diags->Report(TokLoc, diag::err_string_concat_mixed_suffix) | 
|  | 1125 | << UDSuffixBuf << UDSuffix | 
|  | 1126 | << SourceRange(UDSuffixTokLoc, UDSuffixTokLoc) | 
|  | 1127 | << SourceRange(TokLoc, TokLoc); | 
|  | 1128 | } | 
|  | 1129 | hadError = true; | 
|  | 1130 | } | 
|  | 1131 | } | 
|  | 1132 |  | 
|  | 1133 | // Strip the end quote. | 
|  | 1134 | --ThisTokEnd; | 
|  | 1135 |  | 
| Reid Spencer | 5f016e2 | 2007-07-11 17:01:13 +0000 | [diff] [blame] | 1136 | // TODO: Input character set mapping support. | 
| Mike Stump | 1eb4433 | 2009-09-09 15:08:12 +0000 | [diff] [blame] | 1137 |  | 
| Craig Topper | 1661d71 | 2011-08-08 06:10:39 +0000 | [diff] [blame] | 1138 | // Skip marker for wide or unicode strings. | 
| Douglas Gregor | 5cee119 | 2011-07-27 05:40:30 +0000 | [diff] [blame] | 1139 | if (ThisTokBuf[0] == 'L' || ThisTokBuf[0] == 'u' || ThisTokBuf[0] == 'U') { | 
| Reid Spencer | 5f016e2 | 2007-07-11 17:01:13 +0000 | [diff] [blame] | 1140 | ++ThisTokBuf; | 
| Douglas Gregor | 5cee119 | 2011-07-27 05:40:30 +0000 | [diff] [blame] | 1141 | // Skip 8 of u8 marker for utf8 strings. | 
|  | 1142 | if (ThisTokBuf[0] == '8') | 
|  | 1143 | ++ThisTokBuf; | 
| Fariborz Jahanian | 56bedef | 2010-08-31 23:34:27 +0000 | [diff] [blame] | 1144 | } | 
| Mike Stump | 1eb4433 | 2009-09-09 15:08:12 +0000 | [diff] [blame] | 1145 |  | 
| Craig Topper | 2fa4e86 | 2011-08-11 04:06:15 +0000 | [diff] [blame] | 1146 | // Check for raw string | 
|  | 1147 | if (ThisTokBuf[0] == 'R') { | 
|  | 1148 | ThisTokBuf += 2; // skip R" | 
| Mike Stump | 1eb4433 | 2009-09-09 15:08:12 +0000 | [diff] [blame] | 1149 |  | 
| Craig Topper | 2fa4e86 | 2011-08-11 04:06:15 +0000 | [diff] [blame] | 1150 | const char *Prefix = ThisTokBuf; | 
|  | 1151 | while (ThisTokBuf[0] != '(') | 
| Anders Carlsson | ee98ac5 | 2007-10-15 02:50:23 +0000 | [diff] [blame] | 1152 | ++ThisTokBuf; | 
| Craig Topper | 2fa4e86 | 2011-08-11 04:06:15 +0000 | [diff] [blame] | 1153 | ++ThisTokBuf; // skip '(' | 
| Mike Stump | 1eb4433 | 2009-09-09 15:08:12 +0000 | [diff] [blame] | 1154 |  | 
| Craig Topper | 2fa4e86 | 2011-08-11 04:06:15 +0000 | [diff] [blame] | 1155 | // remove same number of characters from the end | 
|  | 1156 | if (ThisTokEnd >= ThisTokBuf + (ThisTokBuf - Prefix)) | 
|  | 1157 | ThisTokEnd -= (ThisTokBuf - Prefix); | 
|  | 1158 |  | 
|  | 1159 | // Copy the string over | 
| Eli Friedman | f74a458 | 2011-11-01 02:14:50 +0000 | [diff] [blame] | 1160 | if (CopyStringFragment(StringRef(ThisTokBuf,ThisTokEnd-ThisTokBuf))) | 
|  | 1161 | { | 
| Eli Friedman | 9135930 | 2012-02-11 05:08:10 +0000 | [diff] [blame] | 1162 | if (DiagnoseBadString(StringToks[i])) | 
|  | 1163 | hadError = true; | 
| Eli Friedman | f74a458 | 2011-11-01 02:14:50 +0000 | [diff] [blame] | 1164 | } | 
|  | 1165 |  | 
| Craig Topper | 2fa4e86 | 2011-08-11 04:06:15 +0000 | [diff] [blame] | 1166 | } else { | 
|  | 1167 | assert(ThisTokBuf[0] == '"' && "Expected quote, lexer broken?"); | 
|  | 1168 | ++ThisTokBuf; // skip " | 
|  | 1169 |  | 
|  | 1170 | // Check if this is a pascal string | 
|  | 1171 | if (Features.PascalStrings && ThisTokBuf + 1 != ThisTokEnd && | 
|  | 1172 | ThisTokBuf[0] == '\\' && ThisTokBuf[1] == 'p') { | 
|  | 1173 |  | 
|  | 1174 | // If the \p sequence is found in the first token, we have a pascal string | 
|  | 1175 | // Otherwise, if we already have a pascal string, ignore the first \p | 
|  | 1176 | if (i == 0) { | 
| Reid Spencer | 5f016e2 | 2007-07-11 17:01:13 +0000 | [diff] [blame] | 1177 | ++ThisTokBuf; | 
| Craig Topper | 2fa4e86 | 2011-08-11 04:06:15 +0000 | [diff] [blame] | 1178 | Pascal = true; | 
|  | 1179 | } else if (Pascal) | 
|  | 1180 | ThisTokBuf += 2; | 
|  | 1181 | } | 
| Mike Stump | 1eb4433 | 2009-09-09 15:08:12 +0000 | [diff] [blame] | 1182 |  | 
| Craig Topper | 2fa4e86 | 2011-08-11 04:06:15 +0000 | [diff] [blame] | 1183 | while (ThisTokBuf != ThisTokEnd) { | 
|  | 1184 | // Is this a span of non-escape characters? | 
|  | 1185 | if (ThisTokBuf[0] != '\\') { | 
|  | 1186 | const char *InStart = ThisTokBuf; | 
|  | 1187 | do { | 
|  | 1188 | ++ThisTokBuf; | 
|  | 1189 | } while (ThisTokBuf != ThisTokEnd && ThisTokBuf[0] != '\\'); | 
|  | 1190 |  | 
|  | 1191 | // Copy the character span over. | 
| Eli Friedman | f74a458 | 2011-11-01 02:14:50 +0000 | [diff] [blame] | 1192 | if (CopyStringFragment(StringRef(InStart,ThisTokBuf-InStart))) | 
|  | 1193 | { | 
| Eli Friedman | 9135930 | 2012-02-11 05:08:10 +0000 | [diff] [blame] | 1194 | if (DiagnoseBadString(StringToks[i])) | 
|  | 1195 | hadError = true; | 
| Eli Friedman | f74a458 | 2011-11-01 02:14:50 +0000 | [diff] [blame] | 1196 | } | 
| Craig Topper | 2fa4e86 | 2011-08-11 04:06:15 +0000 | [diff] [blame] | 1197 | continue; | 
| Reid Spencer | 5f016e2 | 2007-07-11 17:01:13 +0000 | [diff] [blame] | 1198 | } | 
| Craig Topper | 2fa4e86 | 2011-08-11 04:06:15 +0000 | [diff] [blame] | 1199 | // Is this a Universal Character Name escape? | 
|  | 1200 | if (ThisTokBuf[1] == 'u' || ThisTokBuf[1] == 'U') { | 
|  | 1201 | EncodeUCNEscape(ThisTokBuf, ThisTokEnd, ResultPtr, | 
|  | 1202 | hadError, FullSourceLoc(StringToks[i].getLocation(),SM), | 
|  | 1203 | CharByteWidth, Diags, Features); | 
|  | 1204 | continue; | 
|  | 1205 | } | 
|  | 1206 | // Otherwise, this is a non-UCN escape character.  Process it. | 
|  | 1207 | unsigned ResultChar = | 
|  | 1208 | ProcessCharEscape(ThisTokBuf, ThisTokEnd, hadError, | 
|  | 1209 | FullSourceLoc(StringToks[i].getLocation(), SM), | 
|  | 1210 | CharByteWidth*8, Diags); | 
| Mike Stump | 1eb4433 | 2009-09-09 15:08:12 +0000 | [diff] [blame] | 1211 |  | 
| Eli Friedman | caf1f26 | 2011-11-02 23:06:23 +0000 | [diff] [blame] | 1212 | if (CharByteWidth == 4) { | 
|  | 1213 | // FIXME: Make the type of the result buffer correct instead of | 
|  | 1214 | // using reinterpret_cast. | 
|  | 1215 | UTF32 *ResultWidePtr = reinterpret_cast<UTF32*>(ResultPtr); | 
| Nico Weber | 9b483df | 2011-11-14 05:17:37 +0000 | [diff] [blame] | 1216 | *ResultWidePtr = ResultChar; | 
| Eli Friedman | caf1f26 | 2011-11-02 23:06:23 +0000 | [diff] [blame] | 1217 | ResultPtr += 4; | 
|  | 1218 | } else if (CharByteWidth == 2) { | 
|  | 1219 | // FIXME: Make the type of the result buffer correct instead of | 
|  | 1220 | // using reinterpret_cast. | 
|  | 1221 | UTF16 *ResultWidePtr = reinterpret_cast<UTF16*>(ResultPtr); | 
| Nico Weber | 9b483df | 2011-11-14 05:17:37 +0000 | [diff] [blame] | 1222 | *ResultWidePtr = ResultChar & 0xFFFF; | 
| Eli Friedman | caf1f26 | 2011-11-02 23:06:23 +0000 | [diff] [blame] | 1223 | ResultPtr += 2; | 
|  | 1224 | } else { | 
|  | 1225 | assert(CharByteWidth == 1 && "Unexpected char width"); | 
|  | 1226 | *ResultPtr++ = ResultChar & 0xFF; | 
|  | 1227 | } | 
| Craig Topper | 2fa4e86 | 2011-08-11 04:06:15 +0000 | [diff] [blame] | 1228 | } | 
| Reid Spencer | 5f016e2 | 2007-07-11 17:01:13 +0000 | [diff] [blame] | 1229 | } | 
|  | 1230 | } | 
| Mike Stump | 1eb4433 | 2009-09-09 15:08:12 +0000 | [diff] [blame] | 1231 |  | 
| Chris Lattner | bbee00b | 2009-01-16 18:51:42 +0000 | [diff] [blame] | 1232 | if (Pascal) { | 
| Eli Friedman | 22508f4 | 2011-11-05 00:41:04 +0000 | [diff] [blame] | 1233 | if (CharByteWidth == 4) { | 
|  | 1234 | // FIXME: Make the type of the result buffer correct instead of | 
|  | 1235 | // using reinterpret_cast. | 
|  | 1236 | UTF32 *ResultWidePtr = reinterpret_cast<UTF32*>(ResultBuf.data()); | 
|  | 1237 | ResultWidePtr[0] = GetNumStringChars() - 1; | 
|  | 1238 | } else if (CharByteWidth == 2) { | 
|  | 1239 | // FIXME: Make the type of the result buffer correct instead of | 
|  | 1240 | // using reinterpret_cast. | 
|  | 1241 | UTF16 *ResultWidePtr = reinterpret_cast<UTF16*>(ResultBuf.data()); | 
|  | 1242 | ResultWidePtr[0] = GetNumStringChars() - 1; | 
|  | 1243 | } else { | 
|  | 1244 | assert(CharByteWidth == 1 && "Unexpected char width"); | 
|  | 1245 | ResultBuf[0] = GetNumStringChars() - 1; | 
|  | 1246 | } | 
| Chris Lattner | bbee00b | 2009-01-16 18:51:42 +0000 | [diff] [blame] | 1247 |  | 
|  | 1248 | // Verify that pascal strings aren't too large. | 
| Chris Lattner | 0833dd0 | 2010-11-17 07:21:13 +0000 | [diff] [blame] | 1249 | if (GetStringLength() > 256) { | 
|  | 1250 | if (Diags) | 
|  | 1251 | Diags->Report(FullSourceLoc(StringToks[0].getLocation(), SM), | 
|  | 1252 | diag::err_pascal_string_too_long) | 
|  | 1253 | << SourceRange(StringToks[0].getLocation(), | 
|  | 1254 | StringToks[NumStringToks-1].getLocation()); | 
| Douglas Gregor | 5cee119 | 2011-07-27 05:40:30 +0000 | [diff] [blame] | 1255 | hadError = true; | 
| Eli Friedman | 57d7dde | 2009-04-01 03:17:08 +0000 | [diff] [blame] | 1256 | return; | 
|  | 1257 | } | 
| Chris Lattner | 0833dd0 | 2010-11-17 07:21:13 +0000 | [diff] [blame] | 1258 | } else if (Diags) { | 
| Douglas Gregor | 427c492 | 2010-07-20 14:33:20 +0000 | [diff] [blame] | 1259 | // Complain if this string literal has too many characters. | 
| Chris Lattner | a95880d | 2010-11-17 07:12:42 +0000 | [diff] [blame] | 1260 | unsigned MaxChars = Features.CPlusPlus? 65536 : Features.C99 ? 4095 : 509; | 
| Douglas Gregor | 427c492 | 2010-07-20 14:33:20 +0000 | [diff] [blame] | 1261 |  | 
|  | 1262 | if (GetNumStringChars() > MaxChars) | 
| Chris Lattner | 0833dd0 | 2010-11-17 07:21:13 +0000 | [diff] [blame] | 1263 | Diags->Report(FullSourceLoc(StringToks[0].getLocation(), SM), | 
|  | 1264 | diag::ext_string_too_long) | 
| Douglas Gregor | 427c492 | 2010-07-20 14:33:20 +0000 | [diff] [blame] | 1265 | << GetNumStringChars() << MaxChars | 
| Chris Lattner | a95880d | 2010-11-17 07:12:42 +0000 | [diff] [blame] | 1266 | << (Features.CPlusPlus ? 2 : Features.C99 ? 1 : 0) | 
| Douglas Gregor | 427c492 | 2010-07-20 14:33:20 +0000 | [diff] [blame] | 1267 | << SourceRange(StringToks[0].getLocation(), | 
|  | 1268 | StringToks[NumStringToks-1].getLocation()); | 
| Chris Lattner | bbee00b | 2009-01-16 18:51:42 +0000 | [diff] [blame] | 1269 | } | 
| Reid Spencer | 5f016e2 | 2007-07-11 17:01:13 +0000 | [diff] [blame] | 1270 | } | 
| Chris Lattner | 719e615 | 2009-02-18 19:21:10 +0000 | [diff] [blame] | 1271 |  | 
|  | 1272 |  | 
| Craig Topper | 2fa4e86 | 2011-08-11 04:06:15 +0000 | [diff] [blame] | 1273 | /// copyStringFragment - This function copies from Start to End into ResultPtr. | 
|  | 1274 | /// Performs widening for multi-byte characters. | 
| Eli Friedman | f74a458 | 2011-11-01 02:14:50 +0000 | [diff] [blame] | 1275 | bool StringLiteralParser::CopyStringFragment(StringRef Fragment) { | 
|  | 1276 | assert(CharByteWidth==1 || CharByteWidth==2 || CharByteWidth==4); | 
|  | 1277 | ConversionResult result = conversionOK; | 
| Craig Topper | 2fa4e86 | 2011-08-11 04:06:15 +0000 | [diff] [blame] | 1278 | // Copy the character span over. | 
|  | 1279 | if (CharByteWidth == 1) { | 
| Eli Friedman | 9135930 | 2012-02-11 05:08:10 +0000 | [diff] [blame] | 1280 | if (!isLegalUTF8Sequence(reinterpret_cast<const UTF8*>(Fragment.begin()), | 
|  | 1281 | reinterpret_cast<const UTF8*>(Fragment.end()))) | 
|  | 1282 | result = sourceIllegal; | 
| Craig Topper | 2fa4e86 | 2011-08-11 04:06:15 +0000 | [diff] [blame] | 1283 | memcpy(ResultPtr, Fragment.data(), Fragment.size()); | 
|  | 1284 | ResultPtr += Fragment.size(); | 
| Eli Friedman | f74a458 | 2011-11-01 02:14:50 +0000 | [diff] [blame] | 1285 | } else if (CharByteWidth == 2) { | 
|  | 1286 | UTF8 const *sourceStart = (UTF8 const *)Fragment.data(); | 
|  | 1287 | // FIXME: Make the type of the result buffer correct instead of | 
|  | 1288 | // using reinterpret_cast. | 
|  | 1289 | UTF16 *targetStart = reinterpret_cast<UTF16*>(ResultPtr); | 
| Eli Friedman | 9135930 | 2012-02-11 05:08:10 +0000 | [diff] [blame] | 1290 | ConversionFlags flags = strictConversion; | 
| Eli Friedman | f74a458 | 2011-11-01 02:14:50 +0000 | [diff] [blame] | 1291 | result = ConvertUTF8toUTF16( | 
|  | 1292 | &sourceStart,sourceStart + Fragment.size(), | 
|  | 1293 | &targetStart,targetStart + 2*Fragment.size(),flags); | 
|  | 1294 | if (result==conversionOK) | 
|  | 1295 | ResultPtr = reinterpret_cast<char*>(targetStart); | 
|  | 1296 | } else if (CharByteWidth == 4) { | 
|  | 1297 | UTF8 const *sourceStart = (UTF8 const *)Fragment.data(); | 
|  | 1298 | // FIXME: Make the type of the result buffer correct instead of | 
|  | 1299 | // using reinterpret_cast. | 
|  | 1300 | UTF32 *targetStart = reinterpret_cast<UTF32*>(ResultPtr); | 
| Eli Friedman | 9135930 | 2012-02-11 05:08:10 +0000 | [diff] [blame] | 1301 | ConversionFlags flags = strictConversion; | 
| Eli Friedman | f74a458 | 2011-11-01 02:14:50 +0000 | [diff] [blame] | 1302 | result = ConvertUTF8toUTF32( | 
|  | 1303 | &sourceStart,sourceStart + Fragment.size(), | 
|  | 1304 | &targetStart,targetStart + 4*Fragment.size(),flags); | 
|  | 1305 | if (result==conversionOK) | 
|  | 1306 | ResultPtr = reinterpret_cast<char*>(targetStart); | 
| Craig Topper | 2fa4e86 | 2011-08-11 04:06:15 +0000 | [diff] [blame] | 1307 | } | 
| Eli Friedman | f74a458 | 2011-11-01 02:14:50 +0000 | [diff] [blame] | 1308 | assert((result != targetExhausted) | 
|  | 1309 | && "ConvertUTF8toUTFXX exhausted target buffer"); | 
|  | 1310 | return result != conversionOK; | 
| Craig Topper | 2fa4e86 | 2011-08-11 04:06:15 +0000 | [diff] [blame] | 1311 | } | 
|  | 1312 |  | 
| Eli Friedman | 9135930 | 2012-02-11 05:08:10 +0000 | [diff] [blame] | 1313 | bool StringLiteralParser::DiagnoseBadString(const Token &Tok) { | 
|  | 1314 | // If we see bad encoding for unprefixed string literals, warn and | 
|  | 1315 | // simply copy the byte values, for compatibility with gcc and older | 
|  | 1316 | // versions of clang. | 
|  | 1317 | bool NoErrorOnBadEncoding = isAscii(); | 
|  | 1318 | unsigned Msg = NoErrorOnBadEncoding ? diag::warn_bad_string_encoding : | 
|  | 1319 | diag::err_bad_string_encoding; | 
|  | 1320 | if (Diags) | 
|  | 1321 | Diags->Report(FullSourceLoc(Tok.getLocation(), SM), Msg); | 
|  | 1322 | return !NoErrorOnBadEncoding; | 
|  | 1323 | } | 
| Craig Topper | 2fa4e86 | 2011-08-11 04:06:15 +0000 | [diff] [blame] | 1324 |  | 
| Chris Lattner | 719e615 | 2009-02-18 19:21:10 +0000 | [diff] [blame] | 1325 | /// getOffsetOfStringByte - This function returns the offset of the | 
|  | 1326 | /// specified byte of the string data represented by Token.  This handles | 
|  | 1327 | /// advancing over escape sequences in the string. | 
|  | 1328 | unsigned StringLiteralParser::getOffsetOfStringByte(const Token &Tok, | 
| Chris Lattner | 6c66f07 | 2010-11-17 06:46:14 +0000 | [diff] [blame] | 1329 | unsigned ByteNo) const { | 
| Chris Lattner | 719e615 | 2009-02-18 19:21:10 +0000 | [diff] [blame] | 1330 | // Get the spelling of the token. | 
| Dylan Noblesmith | f7ccbad | 2012-02-05 02:13:05 +0000 | [diff] [blame] | 1331 | SmallString<32> SpellingBuffer; | 
| Sean Hunt | 6cf7502 | 2010-08-30 17:47:05 +0000 | [diff] [blame] | 1332 | SpellingBuffer.resize(Tok.getLength()); | 
| Mike Stump | 1eb4433 | 2009-09-09 15:08:12 +0000 | [diff] [blame] | 1333 |  | 
| Douglas Gregor | 50f6af7 | 2010-03-16 05:20:39 +0000 | [diff] [blame] | 1334 | bool StringInvalid = false; | 
| Chris Lattner | 719e615 | 2009-02-18 19:21:10 +0000 | [diff] [blame] | 1335 | const char *SpellingPtr = &SpellingBuffer[0]; | 
| Chris Lattner | b060727 | 2010-11-17 07:26:20 +0000 | [diff] [blame] | 1336 | unsigned TokLen = Lexer::getSpelling(Tok, SpellingPtr, SM, Features, | 
|  | 1337 | &StringInvalid); | 
| Chris Lattner | 91f54ce | 2010-11-17 06:26:08 +0000 | [diff] [blame] | 1338 | if (StringInvalid) | 
| Douglas Gregor | 50f6af7 | 2010-03-16 05:20:39 +0000 | [diff] [blame] | 1339 | return 0; | 
| Chris Lattner | 719e615 | 2009-02-18 19:21:10 +0000 | [diff] [blame] | 1340 |  | 
| Douglas Gregor | 5cee119 | 2011-07-27 05:40:30 +0000 | [diff] [blame] | 1341 | assert(SpellingPtr[0] != 'L' && SpellingPtr[0] != 'u' && | 
|  | 1342 | SpellingPtr[0] != 'U' && "Doesn't handle wide or utf strings yet"); | 
| Chris Lattner | 719e615 | 2009-02-18 19:21:10 +0000 | [diff] [blame] | 1343 |  | 
| Mike Stump | 1eb4433 | 2009-09-09 15:08:12 +0000 | [diff] [blame] | 1344 |  | 
| Chris Lattner | 719e615 | 2009-02-18 19:21:10 +0000 | [diff] [blame] | 1345 | const char *SpellingStart = SpellingPtr; | 
|  | 1346 | const char *SpellingEnd = SpellingPtr+TokLen; | 
|  | 1347 |  | 
|  | 1348 | // Skip over the leading quote. | 
|  | 1349 | assert(SpellingPtr[0] == '"' && "Should be a string literal!"); | 
|  | 1350 | ++SpellingPtr; | 
| Mike Stump | 1eb4433 | 2009-09-09 15:08:12 +0000 | [diff] [blame] | 1351 |  | 
| Chris Lattner | 719e615 | 2009-02-18 19:21:10 +0000 | [diff] [blame] | 1352 | // Skip over bytes until we find the offset we're looking for. | 
|  | 1353 | while (ByteNo) { | 
|  | 1354 | assert(SpellingPtr < SpellingEnd && "Didn't find byte offset!"); | 
| Mike Stump | 1eb4433 | 2009-09-09 15:08:12 +0000 | [diff] [blame] | 1355 |  | 
| Chris Lattner | 719e615 | 2009-02-18 19:21:10 +0000 | [diff] [blame] | 1356 | // Step over non-escapes simply. | 
|  | 1357 | if (*SpellingPtr != '\\') { | 
|  | 1358 | ++SpellingPtr; | 
|  | 1359 | --ByteNo; | 
|  | 1360 | continue; | 
|  | 1361 | } | 
| Mike Stump | 1eb4433 | 2009-09-09 15:08:12 +0000 | [diff] [blame] | 1362 |  | 
| Chris Lattner | 719e615 | 2009-02-18 19:21:10 +0000 | [diff] [blame] | 1363 | // Otherwise, this is an escape character.  Advance over it. | 
|  | 1364 | bool HadError = false; | 
|  | 1365 | ProcessCharEscape(SpellingPtr, SpellingEnd, HadError, | 
| Chris Lattner | ca1475e | 2010-11-17 06:35:43 +0000 | [diff] [blame] | 1366 | FullSourceLoc(Tok.getLocation(), SM), | 
| Douglas Gregor | 5cee119 | 2011-07-27 05:40:30 +0000 | [diff] [blame] | 1367 | CharByteWidth*8, Diags); | 
| Chris Lattner | 719e615 | 2009-02-18 19:21:10 +0000 | [diff] [blame] | 1368 | assert(!HadError && "This method isn't valid on erroneous strings"); | 
|  | 1369 | --ByteNo; | 
|  | 1370 | } | 
| Mike Stump | 1eb4433 | 2009-09-09 15:08:12 +0000 | [diff] [blame] | 1371 |  | 
| Chris Lattner | 719e615 | 2009-02-18 19:21:10 +0000 | [diff] [blame] | 1372 | return SpellingPtr-SpellingStart; | 
|  | 1373 | } |