| Reid Spencer | 5f016e2 | 2007-07-11 17:01:13 +0000 | [diff] [blame] | 1 | //===--- Lexer.cpp - C Language Family Lexer ------------------------------===// | 
 | 2 | // | 
 | 3 | //                     The LLVM Compiler Infrastructure | 
 | 4 | // | 
 | 5 | // This file was developed by Chris Lattner and is distributed under | 
 | 6 | // the University of Illinois Open Source License. See LICENSE.TXT for details. | 
 | 7 | // | 
 | 8 | //===----------------------------------------------------------------------===// | 
 | 9 | // | 
| Chris Lattner | d217773 | 2007-07-20 16:59:19 +0000 | [diff] [blame] | 10 | //  This file implements the Lexer and Token interfaces. | 
| Reid Spencer | 5f016e2 | 2007-07-11 17:01:13 +0000 | [diff] [blame] | 11 | // | 
 | 12 | //===----------------------------------------------------------------------===// | 
 | 13 | // | 
 | 14 | // TODO: GCC Diagnostics emitted by the lexer: | 
 | 15 | // PEDWARN: (form feed|vertical tab) in preprocessing directive | 
 | 16 | // | 
 | 17 | // Universal characters, unicode, char mapping: | 
 | 18 | // WARNING: `%.*s' is not in NFKC | 
 | 19 | // WARNING: `%.*s' is not in NFC | 
 | 20 | // | 
 | 21 | // Other: | 
 | 22 | // TODO: Options to support: | 
 | 23 | //    -fexec-charset,-fwide-exec-charset | 
 | 24 | // | 
 | 25 | //===----------------------------------------------------------------------===// | 
 | 26 |  | 
 | 27 | #include "clang/Lex/Lexer.h" | 
 | 28 | #include "clang/Lex/Preprocessor.h" | 
 | 29 | #include "clang/Basic/Diagnostic.h" | 
| Chris Lattner | 9dc1f53 | 2007-07-20 16:37:10 +0000 | [diff] [blame] | 30 | #include "clang/Basic/SourceManager.h" | 
| Chris Lattner | 409a036 | 2007-07-22 18:38:25 +0000 | [diff] [blame] | 31 | #include "llvm/Support/Compiler.h" | 
| Reid Spencer | 5f016e2 | 2007-07-11 17:01:13 +0000 | [diff] [blame] | 32 | #include "llvm/Support/MemoryBuffer.h" | 
 | 33 | #include <cctype> | 
 | 34 | using namespace clang; | 
 | 35 |  | 
 | 36 | static void InitCharacterInfo(); | 
 | 37 |  | 
| Chris Lattner | 25bdb51 | 2007-07-20 16:52:03 +0000 | [diff] [blame] | 38 | Lexer::Lexer(SourceLocation fileloc, Preprocessor &pp, | 
 | 39 |              const char *BufStart, const char *BufEnd) | 
 | 40 |   : FileLoc(fileloc), PP(pp), Features(PP.getLangOptions()) { | 
 | 41 |        | 
 | 42 |   SourceManager &SourceMgr = PP.getSourceManager(); | 
| Chris Lattner | 448cec4 | 2007-07-22 18:44:36 +0000 | [diff] [blame] | 43 |   unsigned InputFileID = SourceMgr.getPhysicalLoc(FileLoc).getFileID(); | 
 | 44 |   const llvm::MemoryBuffer *InputFile = SourceMgr.getBuffer(InputFileID); | 
| Chris Lattner | 25bdb51 | 2007-07-20 16:52:03 +0000 | [diff] [blame] | 45 |        | 
| Reid Spencer | 5f016e2 | 2007-07-11 17:01:13 +0000 | [diff] [blame] | 46 |   Is_PragmaLexer = false; | 
 | 47 |   IsMainFile = false; | 
 | 48 |   InitCharacterInfo(); | 
| Chris Lattner | 448cec4 | 2007-07-22 18:44:36 +0000 | [diff] [blame] | 49 |    | 
 | 50 |   // BufferStart must always be InputFile->getBufferStart(). | 
 | 51 |   BufferStart = InputFile->getBufferStart(); | 
 | 52 |    | 
 | 53 |   // BufferPtr and BufferEnd can start out somewhere inside the current buffer. | 
 | 54 |   // If unspecified, they starts at the start/end of the buffer. | 
 | 55 |   BufferPtr = BufStart ? BufStart : BufferStart; | 
| Chris Lattner | 25bdb51 | 2007-07-20 16:52:03 +0000 | [diff] [blame] | 56 |   BufferEnd = BufEnd ? BufEnd : InputFile->getBufferEnd(); | 
 | 57 |  | 
| Reid Spencer | 5f016e2 | 2007-07-11 17:01:13 +0000 | [diff] [blame] | 58 |   assert(BufferEnd[0] == 0 && | 
 | 59 |          "We assume that the input buffer has a null character at the end" | 
 | 60 |          " to simplify lexing!"); | 
| Chris Lattner | 25bdb51 | 2007-07-20 16:52:03 +0000 | [diff] [blame] | 61 |    | 
| Reid Spencer | 5f016e2 | 2007-07-11 17:01:13 +0000 | [diff] [blame] | 62 |   // Start of the file is a start of line. | 
 | 63 |   IsAtStartOfLine = true; | 
 | 64 |  | 
 | 65 |   // We are not after parsing a #. | 
 | 66 |   ParsingPreprocessorDirective = false; | 
 | 67 |  | 
 | 68 |   // We are not after parsing #include. | 
 | 69 |   ParsingFilename = false; | 
 | 70 |  | 
 | 71 |   // We are not in raw mode.  Raw mode disables diagnostics and interpretation | 
 | 72 |   // of tokens (e.g. identifiers, thus disabling macro expansion).  It is used | 
 | 73 |   // to quickly lex the tokens of the buffer, e.g. when handling a "#if 0" block | 
 | 74 |   // or otherwise skipping over tokens. | 
 | 75 |   LexingRawMode = false; | 
 | 76 |    | 
 | 77 |   // Default to keeping comments if requested. | 
 | 78 |   KeepCommentMode = PP.getCommentRetentionState(); | 
 | 79 | } | 
 | 80 |  | 
 | 81 | /// Stringify - Convert the specified string into a C string, with surrounding | 
 | 82 | /// ""'s, and with escaped \ and " characters. | 
 | 83 | std::string Lexer::Stringify(const std::string &Str, bool Charify) { | 
 | 84 |   std::string Result = Str; | 
 | 85 |   char Quote = Charify ? '\'' : '"'; | 
 | 86 |   for (unsigned i = 0, e = Result.size(); i != e; ++i) { | 
 | 87 |     if (Result[i] == '\\' || Result[i] == Quote) { | 
 | 88 |       Result.insert(Result.begin()+i, '\\'); | 
 | 89 |       ++i; ++e; | 
 | 90 |     } | 
 | 91 |   } | 
 | 92 |   return Result; | 
 | 93 | } | 
 | 94 |  | 
 | 95 |  | 
 | 96 | //===----------------------------------------------------------------------===// | 
 | 97 | // Character information. | 
 | 98 | //===----------------------------------------------------------------------===// | 
 | 99 |  | 
 | 100 | static unsigned char CharInfo[256]; | 
 | 101 |  | 
 | 102 | enum { | 
 | 103 |   CHAR_HORZ_WS  = 0x01,  // ' ', '\t', '\f', '\v'.  Note, no '\0' | 
 | 104 |   CHAR_VERT_WS  = 0x02,  // '\r', '\n' | 
 | 105 |   CHAR_LETTER   = 0x04,  // a-z,A-Z | 
 | 106 |   CHAR_NUMBER   = 0x08,  // 0-9 | 
 | 107 |   CHAR_UNDER    = 0x10,  // _ | 
 | 108 |   CHAR_PERIOD   = 0x20   // . | 
 | 109 | }; | 
 | 110 |  | 
 | 111 | static void InitCharacterInfo() { | 
 | 112 |   static bool isInited = false; | 
 | 113 |   if (isInited) return; | 
 | 114 |   isInited = true; | 
 | 115 |    | 
 | 116 |   // Intiialize the CharInfo table. | 
 | 117 |   // TODO: statically initialize this. | 
 | 118 |   CharInfo[(int)' '] = CharInfo[(int)'\t'] =  | 
 | 119 |   CharInfo[(int)'\f'] = CharInfo[(int)'\v'] = CHAR_HORZ_WS; | 
 | 120 |   CharInfo[(int)'\n'] = CharInfo[(int)'\r'] = CHAR_VERT_WS; | 
 | 121 |    | 
 | 122 |   CharInfo[(int)'_'] = CHAR_UNDER; | 
 | 123 |   CharInfo[(int)'.'] = CHAR_PERIOD; | 
 | 124 |   for (unsigned i = 'a'; i <= 'z'; ++i) | 
 | 125 |     CharInfo[i] = CharInfo[i+'A'-'a'] = CHAR_LETTER; | 
 | 126 |   for (unsigned i = '0'; i <= '9'; ++i) | 
 | 127 |     CharInfo[i] = CHAR_NUMBER; | 
 | 128 | } | 
 | 129 |  | 
 | 130 | /// isIdentifierBody - Return true if this is the body character of an | 
 | 131 | /// identifier, which is [a-zA-Z0-9_]. | 
 | 132 | static inline bool isIdentifierBody(unsigned char c) { | 
 | 133 |   return CharInfo[c] & (CHAR_LETTER|CHAR_NUMBER|CHAR_UNDER); | 
 | 134 | } | 
 | 135 |  | 
 | 136 | /// isHorizontalWhitespace - Return true if this character is horizontal | 
 | 137 | /// whitespace: ' ', '\t', '\f', '\v'.  Note that this returns false for '\0'. | 
 | 138 | static inline bool isHorizontalWhitespace(unsigned char c) { | 
 | 139 |   return CharInfo[c] & CHAR_HORZ_WS; | 
 | 140 | } | 
 | 141 |  | 
 | 142 | /// isWhitespace - Return true if this character is horizontal or vertical | 
 | 143 | /// whitespace: ' ', '\t', '\f', '\v', '\n', '\r'.  Note that this returns false | 
 | 144 | /// for '\0'. | 
 | 145 | static inline bool isWhitespace(unsigned char c) { | 
 | 146 |   return CharInfo[c] & (CHAR_HORZ_WS|CHAR_VERT_WS); | 
 | 147 | } | 
 | 148 |  | 
 | 149 | /// isNumberBody - Return true if this is the body character of an | 
 | 150 | /// preprocessing number, which is [a-zA-Z0-9_.]. | 
 | 151 | static inline bool isNumberBody(unsigned char c) { | 
 | 152 |   return CharInfo[c] & (CHAR_LETTER|CHAR_NUMBER|CHAR_UNDER|CHAR_PERIOD); | 
 | 153 | } | 
 | 154 |  | 
 | 155 |  | 
 | 156 | //===----------------------------------------------------------------------===// | 
 | 157 | // Diagnostics forwarding code. | 
 | 158 | //===----------------------------------------------------------------------===// | 
 | 159 |  | 
| Chris Lattner | 409a036 | 2007-07-22 18:38:25 +0000 | [diff] [blame] | 160 | /// GetMappedTokenLoc - If lexing out of a 'mapped buffer', where we pretend the | 
 | 161 | /// lexer buffer was all instantiated at a single point, perform the mapping. | 
 | 162 | /// This is currently only used for _Pragma implementation, so it is the slow | 
 | 163 | /// path of the hot getSourceLocation method.  Do not allow it to be inlined. | 
 | 164 | static SourceLocation GetMappedTokenLoc(Preprocessor &PP, | 
 | 165 |                                         SourceLocation FileLoc, | 
 | 166 |                                         unsigned CharNo) DISABLE_INLINE; | 
 | 167 | static SourceLocation GetMappedTokenLoc(Preprocessor &PP, | 
 | 168 |                                         SourceLocation FileLoc, | 
 | 169 |                                         unsigned CharNo) { | 
 | 170 |   // Otherwise, we're lexing "mapped tokens".  This is used for things like | 
 | 171 |   // _Pragma handling.  Combine the instantiation location of FileLoc with the | 
 | 172 |   // physical location. | 
 | 173 |   SourceManager &SourceMgr = PP.getSourceManager(); | 
 | 174 |    | 
 | 175 |   // Create a new SLoc which is expanded from logical(FileLoc) but whose | 
 | 176 |   // characters come from phys(FileLoc)+Offset. | 
 | 177 |   SourceLocation VirtLoc = SourceMgr.getLogicalLoc(FileLoc); | 
 | 178 |   SourceLocation PhysLoc = SourceMgr.getPhysicalLoc(FileLoc); | 
 | 179 |   PhysLoc = SourceLocation::getFileLoc(PhysLoc.getFileID(), CharNo); | 
 | 180 |   return SourceMgr.getInstantiationLoc(PhysLoc, VirtLoc); | 
 | 181 | } | 
 | 182 |  | 
| Reid Spencer | 5f016e2 | 2007-07-11 17:01:13 +0000 | [diff] [blame] | 183 | /// getSourceLocation - Return a source location identifier for the specified | 
 | 184 | /// offset in the current file. | 
 | 185 | SourceLocation Lexer::getSourceLocation(const char *Loc) const { | 
| Chris Lattner | 448cec4 | 2007-07-22 18:44:36 +0000 | [diff] [blame] | 186 |   assert(Loc >= BufferStart && Loc <= BufferEnd && | 
| Reid Spencer | 5f016e2 | 2007-07-11 17:01:13 +0000 | [diff] [blame] | 187 |          "Location out of range for this buffer!"); | 
| Chris Lattner | 9dc1f53 | 2007-07-20 16:37:10 +0000 | [diff] [blame] | 188 |  | 
 | 189 |   // In the normal case, we're just lexing from a simple file buffer, return | 
 | 190 |   // the file id from FileLoc with the offset specified. | 
| Chris Lattner | 448cec4 | 2007-07-22 18:44:36 +0000 | [diff] [blame] | 191 |   unsigned CharNo = Loc-BufferStart; | 
| Chris Lattner | 9dc1f53 | 2007-07-20 16:37:10 +0000 | [diff] [blame] | 192 |   if (FileLoc.isFileID()) | 
 | 193 |     return SourceLocation::getFileLoc(FileLoc.getFileID(), CharNo); | 
 | 194 |    | 
| Chris Lattner | 409a036 | 2007-07-22 18:38:25 +0000 | [diff] [blame] | 195 |   return GetMappedTokenLoc(PP, FileLoc, CharNo); | 
| Reid Spencer | 5f016e2 | 2007-07-11 17:01:13 +0000 | [diff] [blame] | 196 | } | 
 | 197 |  | 
| Reid Spencer | 5f016e2 | 2007-07-11 17:01:13 +0000 | [diff] [blame] | 198 | /// Diag - Forwarding function for diagnostics.  This translate a source | 
 | 199 | /// position in the current buffer into a SourceLocation object for rendering. | 
 | 200 | void Lexer::Diag(const char *Loc, unsigned DiagID, | 
 | 201 |                  const std::string &Msg) const { | 
 | 202 |   if (LexingRawMode && Diagnostic::isNoteWarningOrExtension(DiagID)) | 
 | 203 |     return; | 
 | 204 |   PP.Diag(getSourceLocation(Loc), DiagID, Msg); | 
 | 205 | } | 
 | 206 | void Lexer::Diag(SourceLocation Loc, unsigned DiagID, | 
 | 207 |                  const std::string &Msg) const { | 
 | 208 |   if (LexingRawMode && Diagnostic::isNoteWarningOrExtension(DiagID)) | 
 | 209 |     return; | 
 | 210 |   PP.Diag(Loc, DiagID, Msg); | 
 | 211 | } | 
 | 212 |  | 
 | 213 |  | 
 | 214 | //===----------------------------------------------------------------------===// | 
 | 215 | // Trigraph and Escaped Newline Handling Code. | 
 | 216 | //===----------------------------------------------------------------------===// | 
 | 217 |  | 
 | 218 | /// GetTrigraphCharForLetter - Given a character that occurs after a ?? pair, | 
 | 219 | /// return the decoded trigraph letter it corresponds to, or '\0' if nothing. | 
 | 220 | static char GetTrigraphCharForLetter(char Letter) { | 
 | 221 |   switch (Letter) { | 
 | 222 |   default:   return 0; | 
 | 223 |   case '=':  return '#'; | 
 | 224 |   case ')':  return ']'; | 
 | 225 |   case '(':  return '['; | 
 | 226 |   case '!':  return '|'; | 
 | 227 |   case '\'': return '^'; | 
 | 228 |   case '>':  return '}'; | 
 | 229 |   case '/':  return '\\'; | 
 | 230 |   case '<':  return '{'; | 
 | 231 |   case '-':  return '~'; | 
 | 232 |   } | 
 | 233 | } | 
 | 234 |  | 
 | 235 | /// DecodeTrigraphChar - If the specified character is a legal trigraph when | 
 | 236 | /// prefixed with ??, emit a trigraph warning.  If trigraphs are enabled, | 
 | 237 | /// return the result character.  Finally, emit a warning about trigraph use | 
 | 238 | /// whether trigraphs are enabled or not. | 
 | 239 | static char DecodeTrigraphChar(const char *CP, Lexer *L) { | 
 | 240 |   char Res = GetTrigraphCharForLetter(*CP); | 
 | 241 |   if (Res && L) { | 
 | 242 |     if (!L->getFeatures().Trigraphs) { | 
 | 243 |       L->Diag(CP-2, diag::trigraph_ignored); | 
 | 244 |       return 0; | 
 | 245 |     } else { | 
 | 246 |       L->Diag(CP-2, diag::trigraph_converted, std::string()+Res); | 
 | 247 |     } | 
 | 248 |   } | 
 | 249 |   return Res; | 
 | 250 | } | 
 | 251 |  | 
 | 252 | /// getCharAndSizeSlow - Peek a single 'character' from the specified buffer, | 
 | 253 | /// get its size, and return it.  This is tricky in several cases: | 
 | 254 | ///   1. If currently at the start of a trigraph, we warn about the trigraph, | 
 | 255 | ///      then either return the trigraph (skipping 3 chars) or the '?', | 
 | 256 | ///      depending on whether trigraphs are enabled or not. | 
 | 257 | ///   2. If this is an escaped newline (potentially with whitespace between | 
 | 258 | ///      the backslash and newline), implicitly skip the newline and return | 
 | 259 | ///      the char after it. | 
 | 260 | ///   3. If this is a UCN, return it.  FIXME: C++ UCN's? | 
 | 261 | /// | 
 | 262 | /// This handles the slow/uncommon case of the getCharAndSize method.  Here we | 
 | 263 | /// know that we can accumulate into Size, and that we have already incremented | 
 | 264 | /// Ptr by Size bytes. | 
 | 265 | /// | 
 | 266 | /// NOTE: When this method is updated, getCharAndSizeSlowNoWarn (below) should | 
 | 267 | /// be updated to match. | 
 | 268 | /// | 
 | 269 | char Lexer::getCharAndSizeSlow(const char *Ptr, unsigned &Size, | 
| Chris Lattner | d217773 | 2007-07-20 16:59:19 +0000 | [diff] [blame] | 270 |                                Token *Tok) { | 
| Reid Spencer | 5f016e2 | 2007-07-11 17:01:13 +0000 | [diff] [blame] | 271 |   // If we have a slash, look for an escaped newline. | 
 | 272 |   if (Ptr[0] == '\\') { | 
 | 273 |     ++Size; | 
 | 274 |     ++Ptr; | 
 | 275 | Slash: | 
 | 276 |     // Common case, backslash-char where the char is not whitespace. | 
 | 277 |     if (!isWhitespace(Ptr[0])) return '\\'; | 
 | 278 |      | 
 | 279 |     // See if we have optional whitespace characters followed by a newline. | 
 | 280 |     { | 
 | 281 |       unsigned SizeTmp = 0; | 
 | 282 |       do { | 
 | 283 |         ++SizeTmp; | 
 | 284 |         if (Ptr[SizeTmp-1] == '\n' || Ptr[SizeTmp-1] == '\r') { | 
 | 285 |           // Remember that this token needs to be cleaned. | 
| Chris Lattner | d217773 | 2007-07-20 16:59:19 +0000 | [diff] [blame] | 286 |           if (Tok) Tok->setFlag(Token::NeedsCleaning); | 
| Reid Spencer | 5f016e2 | 2007-07-11 17:01:13 +0000 | [diff] [blame] | 287 |  | 
 | 288 |           // Warn if there was whitespace between the backslash and newline. | 
 | 289 |           if (SizeTmp != 1 && Tok) | 
 | 290 |             Diag(Ptr, diag::backslash_newline_space); | 
 | 291 |            | 
 | 292 |           // If this is a \r\n or \n\r, skip the newlines. | 
 | 293 |           if ((Ptr[SizeTmp] == '\r' || Ptr[SizeTmp] == '\n') && | 
 | 294 |               Ptr[SizeTmp-1] != Ptr[SizeTmp]) | 
 | 295 |             ++SizeTmp; | 
 | 296 |            | 
 | 297 |           // Found backslash<whitespace><newline>.  Parse the char after it. | 
 | 298 |           Size += SizeTmp; | 
 | 299 |           Ptr  += SizeTmp; | 
 | 300 |           // Use slow version to accumulate a correct size field. | 
 | 301 |           return getCharAndSizeSlow(Ptr, Size, Tok); | 
 | 302 |         } | 
 | 303 |       } while (isWhitespace(Ptr[SizeTmp])); | 
 | 304 |     } | 
 | 305 |        | 
 | 306 |     // Otherwise, this is not an escaped newline, just return the slash. | 
 | 307 |     return '\\'; | 
 | 308 |   } | 
 | 309 |    | 
 | 310 |   // If this is a trigraph, process it. | 
 | 311 |   if (Ptr[0] == '?' && Ptr[1] == '?') { | 
 | 312 |     // If this is actually a legal trigraph (not something like "??x"), emit | 
 | 313 |     // a trigraph warning.  If so, and if trigraphs are enabled, return it. | 
 | 314 |     if (char C = DecodeTrigraphChar(Ptr+2, Tok ? this : 0)) { | 
 | 315 |       // Remember that this token needs to be cleaned. | 
| Chris Lattner | d217773 | 2007-07-20 16:59:19 +0000 | [diff] [blame] | 316 |       if (Tok) Tok->setFlag(Token::NeedsCleaning); | 
| Reid Spencer | 5f016e2 | 2007-07-11 17:01:13 +0000 | [diff] [blame] | 317 |  | 
 | 318 |       Ptr += 3; | 
 | 319 |       Size += 3; | 
 | 320 |       if (C == '\\') goto Slash; | 
 | 321 |       return C; | 
 | 322 |     } | 
 | 323 |   } | 
 | 324 |    | 
 | 325 |   // If this is neither, return a single character. | 
 | 326 |   ++Size; | 
 | 327 |   return *Ptr; | 
 | 328 | } | 
 | 329 |  | 
 | 330 |  | 
 | 331 | /// getCharAndSizeSlowNoWarn - Handle the slow/uncommon case of the | 
 | 332 | /// getCharAndSizeNoWarn method.  Here we know that we can accumulate into Size, | 
 | 333 | /// and that we have already incremented Ptr by Size bytes. | 
 | 334 | /// | 
 | 335 | /// NOTE: When this method is updated, getCharAndSizeSlow (above) should | 
 | 336 | /// be updated to match. | 
 | 337 | char Lexer::getCharAndSizeSlowNoWarn(const char *Ptr, unsigned &Size, | 
 | 338 |                                      const LangOptions &Features) { | 
 | 339 |   // If we have a slash, look for an escaped newline. | 
 | 340 |   if (Ptr[0] == '\\') { | 
 | 341 |     ++Size; | 
 | 342 |     ++Ptr; | 
 | 343 | Slash: | 
 | 344 |     // Common case, backslash-char where the char is not whitespace. | 
 | 345 |     if (!isWhitespace(Ptr[0])) return '\\'; | 
 | 346 |      | 
 | 347 |     // See if we have optional whitespace characters followed by a newline. | 
 | 348 |     { | 
 | 349 |       unsigned SizeTmp = 0; | 
 | 350 |       do { | 
 | 351 |         ++SizeTmp; | 
 | 352 |         if (Ptr[SizeTmp-1] == '\n' || Ptr[SizeTmp-1] == '\r') { | 
 | 353 |            | 
 | 354 |           // If this is a \r\n or \n\r, skip the newlines. | 
 | 355 |           if ((Ptr[SizeTmp] == '\r' || Ptr[SizeTmp] == '\n') && | 
 | 356 |               Ptr[SizeTmp-1] != Ptr[SizeTmp]) | 
 | 357 |             ++SizeTmp; | 
 | 358 |            | 
 | 359 |           // Found backslash<whitespace><newline>.  Parse the char after it. | 
 | 360 |           Size += SizeTmp; | 
 | 361 |           Ptr  += SizeTmp; | 
 | 362 |            | 
 | 363 |           // Use slow version to accumulate a correct size field. | 
 | 364 |           return getCharAndSizeSlowNoWarn(Ptr, Size, Features); | 
 | 365 |         } | 
 | 366 |       } while (isWhitespace(Ptr[SizeTmp])); | 
 | 367 |     } | 
 | 368 |      | 
 | 369 |     // Otherwise, this is not an escaped newline, just return the slash. | 
 | 370 |     return '\\'; | 
 | 371 |   } | 
 | 372 |    | 
 | 373 |   // If this is a trigraph, process it. | 
 | 374 |   if (Features.Trigraphs && Ptr[0] == '?' && Ptr[1] == '?') { | 
 | 375 |     // If this is actually a legal trigraph (not something like "??x"), return | 
 | 376 |     // it. | 
 | 377 |     if (char C = GetTrigraphCharForLetter(Ptr[2])) { | 
 | 378 |       Ptr += 3; | 
 | 379 |       Size += 3; | 
 | 380 |       if (C == '\\') goto Slash; | 
 | 381 |       return C; | 
 | 382 |     } | 
 | 383 |   } | 
 | 384 |    | 
 | 385 |   // If this is neither, return a single character. | 
 | 386 |   ++Size; | 
 | 387 |   return *Ptr; | 
 | 388 | } | 
 | 389 |  | 
 | 390 | //===----------------------------------------------------------------------===// | 
 | 391 | // Helper methods for lexing. | 
 | 392 | //===----------------------------------------------------------------------===// | 
 | 393 |  | 
| Chris Lattner | d217773 | 2007-07-20 16:59:19 +0000 | [diff] [blame] | 394 | void Lexer::LexIdentifier(Token &Result, const char *CurPtr) { | 
| Reid Spencer | 5f016e2 | 2007-07-11 17:01:13 +0000 | [diff] [blame] | 395 |   // Match [_A-Za-z0-9]*, we have already matched [_A-Za-z$] | 
 | 396 |   unsigned Size; | 
 | 397 |   unsigned char C = *CurPtr++; | 
 | 398 |   while (isIdentifierBody(C)) { | 
 | 399 |     C = *CurPtr++; | 
 | 400 |   } | 
 | 401 |   --CurPtr;   // Back up over the skipped character. | 
 | 402 |  | 
 | 403 |   // Fast path, no $,\,? in identifier found.  '\' might be an escaped newline | 
 | 404 |   // or UCN, and ? might be a trigraph for '\', an escaped newline or UCN. | 
 | 405 |   // FIXME: UCNs. | 
 | 406 |   if (C != '\\' && C != '?' && (C != '$' || !Features.DollarIdents)) { | 
 | 407 | FinishIdentifier: | 
 | 408 |     const char *IdStart = BufferPtr; | 
 | 409 |     FormTokenWithChars(Result, CurPtr); | 
 | 410 |     Result.setKind(tok::identifier); | 
 | 411 |      | 
 | 412 |     // If we are in raw mode, return this identifier raw.  There is no need to | 
 | 413 |     // look up identifier information or attempt to macro expand it. | 
 | 414 |     if (LexingRawMode) return; | 
 | 415 |      | 
 | 416 |     // Fill in Result.IdentifierInfo, looking up the identifier in the | 
 | 417 |     // identifier table. | 
 | 418 |     PP.LookUpIdentifierInfo(Result, IdStart); | 
 | 419 |      | 
 | 420 |     // Finally, now that we know we have an identifier, pass this off to the | 
 | 421 |     // preprocessor, which may macro expand it or something. | 
 | 422 |     return PP.HandleIdentifier(Result); | 
 | 423 |   } | 
 | 424 |    | 
 | 425 |   // Otherwise, $,\,? in identifier found.  Enter slower path. | 
 | 426 |    | 
 | 427 |   C = getCharAndSize(CurPtr, Size); | 
 | 428 |   while (1) { | 
 | 429 |     if (C == '$') { | 
 | 430 |       // If we hit a $ and they are not supported in identifiers, we are done. | 
 | 431 |       if (!Features.DollarIdents) goto FinishIdentifier; | 
 | 432 |        | 
 | 433 |       // Otherwise, emit a diagnostic and continue. | 
 | 434 |       Diag(CurPtr, diag::ext_dollar_in_identifier); | 
 | 435 |       CurPtr = ConsumeChar(CurPtr, Size, Result); | 
 | 436 |       C = getCharAndSize(CurPtr, Size); | 
 | 437 |       continue; | 
 | 438 |     } else if (!isIdentifierBody(C)) { // FIXME: UCNs. | 
 | 439 |       // Found end of identifier. | 
 | 440 |       goto FinishIdentifier; | 
 | 441 |     } | 
 | 442 |  | 
 | 443 |     // Otherwise, this character is good, consume it. | 
 | 444 |     CurPtr = ConsumeChar(CurPtr, Size, Result); | 
 | 445 |  | 
 | 446 |     C = getCharAndSize(CurPtr, Size); | 
 | 447 |     while (isIdentifierBody(C)) { // FIXME: UCNs. | 
 | 448 |       CurPtr = ConsumeChar(CurPtr, Size, Result); | 
 | 449 |       C = getCharAndSize(CurPtr, Size); | 
 | 450 |     } | 
 | 451 |   } | 
 | 452 | } | 
 | 453 |  | 
 | 454 |  | 
 | 455 | /// LexNumericConstant - Lex the remainer of a integer or floating point | 
 | 456 | /// constant. From[-1] is the first character lexed.  Return the end of the | 
 | 457 | /// constant. | 
| Chris Lattner | d217773 | 2007-07-20 16:59:19 +0000 | [diff] [blame] | 458 | void Lexer::LexNumericConstant(Token &Result, const char *CurPtr) { | 
| Reid Spencer | 5f016e2 | 2007-07-11 17:01:13 +0000 | [diff] [blame] | 459 |   unsigned Size; | 
 | 460 |   char C = getCharAndSize(CurPtr, Size); | 
 | 461 |   char PrevCh = 0; | 
 | 462 |   while (isNumberBody(C)) { // FIXME: UCNs? | 
 | 463 |     CurPtr = ConsumeChar(CurPtr, Size, Result); | 
 | 464 |     PrevCh = C; | 
 | 465 |     C = getCharAndSize(CurPtr, Size); | 
 | 466 |   } | 
 | 467 |    | 
 | 468 |   // If we fell out, check for a sign, due to 1e+12.  If we have one, continue. | 
 | 469 |   if ((C == '-' || C == '+') && (PrevCh == 'E' || PrevCh == 'e')) | 
 | 470 |     return LexNumericConstant(Result, ConsumeChar(CurPtr, Size, Result)); | 
 | 471 |  | 
 | 472 |   // If we have a hex FP constant, continue. | 
 | 473 |   if (Features.HexFloats && | 
 | 474 |       (C == '-' || C == '+') && (PrevCh == 'P' || PrevCh == 'p')) | 
 | 475 |     return LexNumericConstant(Result, ConsumeChar(CurPtr, Size, Result)); | 
 | 476 |    | 
 | 477 |   Result.setKind(tok::numeric_constant); | 
 | 478 |  | 
 | 479 |   // Update the location of token as well as BufferPtr. | 
 | 480 |   FormTokenWithChars(Result, CurPtr); | 
 | 481 | } | 
 | 482 |  | 
 | 483 | /// LexStringLiteral - Lex the remainder of a string literal, after having lexed | 
 | 484 | /// either " or L". | 
| Chris Lattner | d217773 | 2007-07-20 16:59:19 +0000 | [diff] [blame] | 485 | void Lexer::LexStringLiteral(Token &Result, const char *CurPtr, bool Wide){ | 
| Reid Spencer | 5f016e2 | 2007-07-11 17:01:13 +0000 | [diff] [blame] | 486 |   const char *NulCharacter = 0; // Does this string contain the \0 character? | 
 | 487 |    | 
 | 488 |   char C = getAndAdvanceChar(CurPtr, Result); | 
 | 489 |   while (C != '"') { | 
 | 490 |     // Skip escaped characters. | 
 | 491 |     if (C == '\\') { | 
 | 492 |       // Skip the escaped character. | 
 | 493 |       C = getAndAdvanceChar(CurPtr, Result); | 
 | 494 |     } else if (C == '\n' || C == '\r' ||             // Newline. | 
 | 495 |                (C == 0 && CurPtr-1 == BufferEnd)) {  // End of file. | 
 | 496 |       if (!LexingRawMode) Diag(BufferPtr, diag::err_unterminated_string); | 
 | 497 |       Result.setKind(tok::unknown); | 
 | 498 |       FormTokenWithChars(Result, CurPtr-1); | 
 | 499 |       return; | 
 | 500 |     } else if (C == 0) { | 
 | 501 |       NulCharacter = CurPtr-1; | 
 | 502 |     } | 
 | 503 |     C = getAndAdvanceChar(CurPtr, Result); | 
 | 504 |   } | 
 | 505 |    | 
 | 506 |   // If a nul character existed in the string, warn about it. | 
 | 507 |   if (NulCharacter) Diag(NulCharacter, diag::null_in_string); | 
 | 508 |  | 
 | 509 |   Result.setKind(Wide ? tok::wide_string_literal : tok::string_literal); | 
 | 510 |  | 
 | 511 |   // Update the location of the token as well as the BufferPtr instance var. | 
 | 512 |   FormTokenWithChars(Result, CurPtr); | 
 | 513 | } | 
 | 514 |  | 
 | 515 | /// LexAngledStringLiteral - Lex the remainder of an angled string literal, | 
 | 516 | /// after having lexed the '<' character.  This is used for #include filenames. | 
| Chris Lattner | d217773 | 2007-07-20 16:59:19 +0000 | [diff] [blame] | 517 | void Lexer::LexAngledStringLiteral(Token &Result, const char *CurPtr) { | 
| Reid Spencer | 5f016e2 | 2007-07-11 17:01:13 +0000 | [diff] [blame] | 518 |   const char *NulCharacter = 0; // Does this string contain the \0 character? | 
 | 519 |    | 
 | 520 |   char C = getAndAdvanceChar(CurPtr, Result); | 
 | 521 |   while (C != '>') { | 
 | 522 |     // Skip escaped characters. | 
 | 523 |     if (C == '\\') { | 
 | 524 |       // Skip the escaped character. | 
 | 525 |       C = getAndAdvanceChar(CurPtr, Result); | 
 | 526 |     } else if (C == '\n' || C == '\r' ||             // Newline. | 
 | 527 |                (C == 0 && CurPtr-1 == BufferEnd)) {  // End of file. | 
 | 528 |       if (!LexingRawMode) Diag(BufferPtr, diag::err_unterminated_string); | 
 | 529 |       Result.setKind(tok::unknown); | 
 | 530 |       FormTokenWithChars(Result, CurPtr-1); | 
 | 531 |       return; | 
 | 532 |     } else if (C == 0) { | 
 | 533 |       NulCharacter = CurPtr-1; | 
 | 534 |     } | 
 | 535 |     C = getAndAdvanceChar(CurPtr, Result); | 
 | 536 |   } | 
 | 537 |    | 
 | 538 |   // If a nul character existed in the string, warn about it. | 
 | 539 |   if (NulCharacter) Diag(NulCharacter, diag::null_in_string); | 
 | 540 |    | 
 | 541 |   Result.setKind(tok::angle_string_literal); | 
 | 542 |    | 
 | 543 |   // Update the location of token as well as BufferPtr. | 
 | 544 |   FormTokenWithChars(Result, CurPtr); | 
 | 545 | } | 
 | 546 |  | 
 | 547 |  | 
 | 548 | /// LexCharConstant - Lex the remainder of a character constant, after having | 
 | 549 | /// lexed either ' or L'. | 
| Chris Lattner | d217773 | 2007-07-20 16:59:19 +0000 | [diff] [blame] | 550 | void Lexer::LexCharConstant(Token &Result, const char *CurPtr) { | 
| Reid Spencer | 5f016e2 | 2007-07-11 17:01:13 +0000 | [diff] [blame] | 551 |   const char *NulCharacter = 0; // Does this character contain the \0 character? | 
 | 552 |  | 
 | 553 |   // Handle the common case of 'x' and '\y' efficiently. | 
 | 554 |   char C = getAndAdvanceChar(CurPtr, Result); | 
 | 555 |   if (C == '\'') { | 
 | 556 |     if (!LexingRawMode) Diag(BufferPtr, diag::err_empty_character); | 
 | 557 |     Result.setKind(tok::unknown); | 
 | 558 |     FormTokenWithChars(Result, CurPtr); | 
 | 559 |     return; | 
 | 560 |   } else if (C == '\\') { | 
 | 561 |     // Skip the escaped character. | 
 | 562 |     // FIXME: UCN's. | 
 | 563 |     C = getAndAdvanceChar(CurPtr, Result); | 
 | 564 |   } | 
 | 565 |    | 
 | 566 |   if (C && C != '\n' && C != '\r' && CurPtr[0] == '\'') { | 
 | 567 |     ++CurPtr; | 
 | 568 |   } else { | 
 | 569 |     // Fall back on generic code for embedded nulls, newlines, wide chars. | 
 | 570 |     do { | 
 | 571 |       // Skip escaped characters. | 
 | 572 |       if (C == '\\') { | 
 | 573 |         // Skip the escaped character. | 
 | 574 |         C = getAndAdvanceChar(CurPtr, Result); | 
 | 575 |       } else if (C == '\n' || C == '\r' ||               // Newline. | 
 | 576 |                  (C == 0 && CurPtr-1 == BufferEnd)) {    // End of file. | 
 | 577 |         if (!LexingRawMode) Diag(BufferPtr, diag::err_unterminated_char); | 
 | 578 |         Result.setKind(tok::unknown); | 
 | 579 |         FormTokenWithChars(Result, CurPtr-1); | 
 | 580 |         return; | 
 | 581 |       } else if (C == 0) { | 
 | 582 |         NulCharacter = CurPtr-1; | 
 | 583 |       } | 
 | 584 |       C = getAndAdvanceChar(CurPtr, Result); | 
 | 585 |     } while (C != '\''); | 
 | 586 |   } | 
 | 587 |    | 
 | 588 |   if (NulCharacter) Diag(NulCharacter, diag::null_in_char); | 
 | 589 |  | 
 | 590 |   Result.setKind(tok::char_constant); | 
 | 591 |    | 
 | 592 |   // Update the location of token as well as BufferPtr. | 
 | 593 |   FormTokenWithChars(Result, CurPtr); | 
 | 594 | } | 
 | 595 |  | 
 | 596 | /// SkipWhitespace - Efficiently skip over a series of whitespace characters. | 
 | 597 | /// Update BufferPtr to point to the next non-whitespace character and return. | 
| Chris Lattner | d217773 | 2007-07-20 16:59:19 +0000 | [diff] [blame] | 598 | void Lexer::SkipWhitespace(Token &Result, const char *CurPtr) { | 
| Reid Spencer | 5f016e2 | 2007-07-11 17:01:13 +0000 | [diff] [blame] | 599 |   // Whitespace - Skip it, then return the token after the whitespace. | 
 | 600 |   unsigned char Char = *CurPtr;  // Skip consequtive spaces efficiently. | 
 | 601 |   while (1) { | 
 | 602 |     // Skip horizontal whitespace very aggressively. | 
 | 603 |     while (isHorizontalWhitespace(Char)) | 
 | 604 |       Char = *++CurPtr; | 
 | 605 |      | 
 | 606 |     // Otherwise if we something other than whitespace, we're done. | 
 | 607 |     if (Char != '\n' && Char != '\r') | 
 | 608 |       break; | 
 | 609 |      | 
 | 610 |     if (ParsingPreprocessorDirective) { | 
 | 611 |       // End of preprocessor directive line, let LexTokenInternal handle this. | 
 | 612 |       BufferPtr = CurPtr; | 
 | 613 |       return; | 
 | 614 |     } | 
 | 615 |      | 
 | 616 |     // ok, but handle newline. | 
 | 617 |     // The returned token is at the start of the line. | 
| Chris Lattner | d217773 | 2007-07-20 16:59:19 +0000 | [diff] [blame] | 618 |     Result.setFlag(Token::StartOfLine); | 
| Reid Spencer | 5f016e2 | 2007-07-11 17:01:13 +0000 | [diff] [blame] | 619 |     // No leading whitespace seen so far. | 
| Chris Lattner | d217773 | 2007-07-20 16:59:19 +0000 | [diff] [blame] | 620 |     Result.clearFlag(Token::LeadingSpace); | 
| Reid Spencer | 5f016e2 | 2007-07-11 17:01:13 +0000 | [diff] [blame] | 621 |     Char = *++CurPtr; | 
 | 622 |   } | 
 | 623 |  | 
 | 624 |   // If this isn't immediately after a newline, there is leading space. | 
 | 625 |   char PrevChar = CurPtr[-1]; | 
 | 626 |   if (PrevChar != '\n' && PrevChar != '\r') | 
| Chris Lattner | d217773 | 2007-07-20 16:59:19 +0000 | [diff] [blame] | 627 |     Result.setFlag(Token::LeadingSpace); | 
| Reid Spencer | 5f016e2 | 2007-07-11 17:01:13 +0000 | [diff] [blame] | 628 |  | 
| Reid Spencer | 5f016e2 | 2007-07-11 17:01:13 +0000 | [diff] [blame] | 629 |   BufferPtr = CurPtr; | 
 | 630 | } | 
 | 631 |  | 
 | 632 | // SkipBCPLComment - We have just read the // characters from input.  Skip until | 
 | 633 | // we find the newline character thats terminate the comment.  Then update | 
 | 634 | /// BufferPtr and return. | 
| Chris Lattner | d217773 | 2007-07-20 16:59:19 +0000 | [diff] [blame] | 635 | bool Lexer::SkipBCPLComment(Token &Result, const char *CurPtr) { | 
| Reid Spencer | 5f016e2 | 2007-07-11 17:01:13 +0000 | [diff] [blame] | 636 |   // If BCPL comments aren't explicitly enabled for this language, emit an | 
 | 637 |   // extension warning. | 
 | 638 |   if (!Features.BCPLComment) { | 
 | 639 |     Diag(BufferPtr, diag::ext_bcpl_comment); | 
 | 640 |      | 
 | 641 |     // Mark them enabled so we only emit one warning for this translation | 
 | 642 |     // unit. | 
 | 643 |     Features.BCPLComment = true; | 
 | 644 |   } | 
 | 645 |    | 
 | 646 |   // Scan over the body of the comment.  The common case, when scanning, is that | 
 | 647 |   // the comment contains normal ascii characters with nothing interesting in | 
 | 648 |   // them.  As such, optimize for this case with the inner loop. | 
 | 649 |   char C; | 
 | 650 |   do { | 
 | 651 |     C = *CurPtr; | 
 | 652 |     // FIXME: Speedup BCPL comment lexing.  Just scan for a \n or \r character. | 
 | 653 |     // If we find a \n character, scan backwards, checking to see if it's an | 
 | 654 |     // escaped newline, like we do for block comments. | 
 | 655 |      | 
 | 656 |     // Skip over characters in the fast loop. | 
 | 657 |     while (C != 0 &&                // Potentially EOF. | 
 | 658 |            C != '\\' &&             // Potentially escaped newline. | 
 | 659 |            C != '?' &&              // Potentially trigraph. | 
 | 660 |            C != '\n' && C != '\r')  // Newline or DOS-style newline. | 
 | 661 |       C = *++CurPtr; | 
 | 662 |  | 
 | 663 |     // If this is a newline, we're done. | 
 | 664 |     if (C == '\n' || C == '\r') | 
 | 665 |       break;  // Found the newline? Break out! | 
 | 666 |      | 
 | 667 |     // Otherwise, this is a hard case.  Fall back on getAndAdvanceChar to | 
 | 668 |     // properly decode the character. | 
 | 669 |     const char *OldPtr = CurPtr; | 
 | 670 |     C = getAndAdvanceChar(CurPtr, Result); | 
 | 671 |      | 
 | 672 |     // If we read multiple characters, and one of those characters was a \r or | 
 | 673 |     // \n, then we had an escaped newline within the comment.  Emit diagnostic | 
 | 674 |     // unless the next line is also a // comment. | 
 | 675 |     if (CurPtr != OldPtr+1 && C != '/' && CurPtr[0] != '/') { | 
 | 676 |       for (; OldPtr != CurPtr; ++OldPtr) | 
 | 677 |         if (OldPtr[0] == '\n' || OldPtr[0] == '\r') { | 
 | 678 |           // Okay, we found a // comment that ends in a newline, if the next | 
 | 679 |           // line is also a // comment, but has spaces, don't emit a diagnostic. | 
 | 680 |           if (isspace(C)) { | 
 | 681 |             const char *ForwardPtr = CurPtr; | 
 | 682 |             while (isspace(*ForwardPtr))  // Skip whitespace. | 
 | 683 |               ++ForwardPtr; | 
 | 684 |             if (ForwardPtr[0] == '/' && ForwardPtr[1] == '/') | 
 | 685 |               break; | 
 | 686 |           } | 
 | 687 |            | 
 | 688 |           Diag(OldPtr-1, diag::ext_multi_line_bcpl_comment); | 
 | 689 |           break; | 
 | 690 |         } | 
 | 691 |     } | 
 | 692 |      | 
 | 693 |     if (CurPtr == BufferEnd+1) { --CurPtr; break; } | 
 | 694 |   } while (C != '\n' && C != '\r'); | 
 | 695 |  | 
 | 696 |   // Found but did not consume the newline. | 
 | 697 |      | 
 | 698 |   // If we are returning comments as tokens, return this comment as a token. | 
 | 699 |   if (KeepCommentMode) | 
 | 700 |     return SaveBCPLComment(Result, CurPtr); | 
 | 701 |  | 
 | 702 |   // If we are inside a preprocessor directive and we see the end of line, | 
 | 703 |   // return immediately, so that the lexer can return this as an EOM token. | 
 | 704 |   if (ParsingPreprocessorDirective || CurPtr == BufferEnd) { | 
 | 705 |     BufferPtr = CurPtr; | 
 | 706 |     return true; | 
 | 707 |   } | 
 | 708 |    | 
 | 709 |   // Otherwise, eat the \n character.  We don't care if this is a \n\r or | 
 | 710 |   // \r\n sequence. | 
 | 711 |   ++CurPtr; | 
 | 712 |      | 
 | 713 |   // The next returned token is at the start of the line. | 
| Chris Lattner | d217773 | 2007-07-20 16:59:19 +0000 | [diff] [blame] | 714 |   Result.setFlag(Token::StartOfLine); | 
| Reid Spencer | 5f016e2 | 2007-07-11 17:01:13 +0000 | [diff] [blame] | 715 |   // No leading whitespace seen so far. | 
| Chris Lattner | d217773 | 2007-07-20 16:59:19 +0000 | [diff] [blame] | 716 |   Result.clearFlag(Token::LeadingSpace); | 
| Reid Spencer | 5f016e2 | 2007-07-11 17:01:13 +0000 | [diff] [blame] | 717 |   BufferPtr = CurPtr; | 
 | 718 |   return true; | 
 | 719 | } | 
 | 720 |  | 
 | 721 | /// SaveBCPLComment - If in save-comment mode, package up this BCPL comment in | 
 | 722 | /// an appropriate way and return it. | 
| Chris Lattner | d217773 | 2007-07-20 16:59:19 +0000 | [diff] [blame] | 723 | bool Lexer::SaveBCPLComment(Token &Result, const char *CurPtr) { | 
| Reid Spencer | 5f016e2 | 2007-07-11 17:01:13 +0000 | [diff] [blame] | 724 |   Result.setKind(tok::comment); | 
 | 725 |   FormTokenWithChars(Result, CurPtr); | 
 | 726 |    | 
 | 727 |   // If this BCPL-style comment is in a macro definition, transmogrify it into | 
 | 728 |   // a C-style block comment. | 
 | 729 |   if (ParsingPreprocessorDirective) { | 
 | 730 |     std::string Spelling = PP.getSpelling(Result); | 
 | 731 |     assert(Spelling[0] == '/' && Spelling[1] == '/' && "Not bcpl comment?"); | 
 | 732 |     Spelling[1] = '*';   // Change prefix to "/*". | 
 | 733 |     Spelling += "*/";    // add suffix. | 
 | 734 |      | 
 | 735 |     Result.setLocation(PP.CreateString(&Spelling[0], Spelling.size(), | 
 | 736 |                                        Result.getLocation())); | 
 | 737 |     Result.setLength(Spelling.size()); | 
 | 738 |   } | 
 | 739 |   return false; | 
 | 740 | } | 
 | 741 |  | 
 | 742 | /// isBlockCommentEndOfEscapedNewLine - Return true if the specified newline | 
 | 743 | /// character (either \n or \r) is part of an escaped newline sequence.  Issue a | 
 | 744 | /// diagnostic if so.  We know that the is inside of a block comment. | 
 | 745 | static bool isEndOfBlockCommentWithEscapedNewLine(const char *CurPtr,  | 
 | 746 |                                                   Lexer *L) { | 
 | 747 |   assert(CurPtr[0] == '\n' || CurPtr[0] == '\r'); | 
 | 748 |    | 
 | 749 |   // Back up off the newline. | 
 | 750 |   --CurPtr; | 
 | 751 |    | 
 | 752 |   // If this is a two-character newline sequence, skip the other character. | 
 | 753 |   if (CurPtr[0] == '\n' || CurPtr[0] == '\r') { | 
 | 754 |     // \n\n or \r\r -> not escaped newline. | 
 | 755 |     if (CurPtr[0] == CurPtr[1]) | 
 | 756 |       return false; | 
 | 757 |     // \n\r or \r\n -> skip the newline. | 
 | 758 |     --CurPtr; | 
 | 759 |   } | 
 | 760 |    | 
 | 761 |   // If we have horizontal whitespace, skip over it.  We allow whitespace | 
 | 762 |   // between the slash and newline. | 
 | 763 |   bool HasSpace = false; | 
 | 764 |   while (isHorizontalWhitespace(*CurPtr) || *CurPtr == 0) { | 
 | 765 |     --CurPtr; | 
 | 766 |     HasSpace = true; | 
 | 767 |   } | 
 | 768 |    | 
 | 769 |   // If we have a slash, we know this is an escaped newline. | 
 | 770 |   if (*CurPtr == '\\') { | 
 | 771 |     if (CurPtr[-1] != '*') return false; | 
 | 772 |   } else { | 
 | 773 |     // It isn't a slash, is it the ?? / trigraph? | 
 | 774 |     if (CurPtr[0] != '/' || CurPtr[-1] != '?' || CurPtr[-2] != '?' || | 
 | 775 |         CurPtr[-3] != '*') | 
 | 776 |       return false; | 
 | 777 |      | 
 | 778 |     // This is the trigraph ending the comment.  Emit a stern warning! | 
 | 779 |     CurPtr -= 2; | 
 | 780 |  | 
 | 781 |     // If no trigraphs are enabled, warn that we ignored this trigraph and | 
 | 782 |     // ignore this * character. | 
 | 783 |     if (!L->getFeatures().Trigraphs) { | 
 | 784 |       L->Diag(CurPtr, diag::trigraph_ignored_block_comment); | 
 | 785 |       return false; | 
 | 786 |     } | 
 | 787 |     L->Diag(CurPtr, diag::trigraph_ends_block_comment); | 
 | 788 |   } | 
 | 789 |    | 
 | 790 |   // Warn about having an escaped newline between the */ characters. | 
 | 791 |   L->Diag(CurPtr, diag::escaped_newline_block_comment_end); | 
 | 792 |    | 
 | 793 |   // If there was space between the backslash and newline, warn about it. | 
 | 794 |   if (HasSpace) L->Diag(CurPtr, diag::backslash_newline_space); | 
 | 795 |    | 
 | 796 |   return true; | 
 | 797 | } | 
 | 798 |  | 
 | 799 | #ifdef __SSE2__ | 
 | 800 | #include <emmintrin.h> | 
 | 801 | #elif __ALTIVEC__ | 
 | 802 | #include <altivec.h> | 
 | 803 | #undef bool | 
 | 804 | #endif | 
 | 805 |  | 
 | 806 | /// SkipBlockComment - We have just read the /* characters from input.  Read | 
 | 807 | /// until we find the */ characters that terminate the comment.  Note that we | 
 | 808 | /// don't bother decoding trigraphs or escaped newlines in block comments, | 
 | 809 | /// because they cannot cause the comment to end.  The only thing that can | 
 | 810 | /// happen is the comment could end with an escaped newline between the */ end | 
 | 811 | /// of comment. | 
| Chris Lattner | d217773 | 2007-07-20 16:59:19 +0000 | [diff] [blame] | 812 | bool Lexer::SkipBlockComment(Token &Result, const char *CurPtr) { | 
| Reid Spencer | 5f016e2 | 2007-07-11 17:01:13 +0000 | [diff] [blame] | 813 |   // Scan one character past where we should, looking for a '/' character.  Once | 
 | 814 |   // we find it, check to see if it was preceeded by a *.  This common | 
 | 815 |   // optimization helps people who like to put a lot of * characters in their | 
 | 816 |   // comments. | 
| Chris Lattner | 8146b68 | 2007-07-21 23:43:37 +0000 | [diff] [blame] | 817 |  | 
 | 818 |   // The first character we get with newlines and trigraphs skipped to handle | 
 | 819 |   // the degenerate /*/ case below correctly if the * has an escaped newline | 
 | 820 |   // after it. | 
 | 821 |   unsigned CharSize; | 
 | 822 |   unsigned char C = getCharAndSize(CurPtr, CharSize); | 
 | 823 |   CurPtr += CharSize; | 
| Reid Spencer | 5f016e2 | 2007-07-11 17:01:13 +0000 | [diff] [blame] | 824 |   if (C == 0 && CurPtr == BufferEnd+1) { | 
 | 825 |     Diag(BufferPtr, diag::err_unterminated_block_comment); | 
 | 826 |     BufferPtr = CurPtr-1; | 
 | 827 |     return true; | 
 | 828 |   } | 
 | 829 |    | 
| Chris Lattner | 8146b68 | 2007-07-21 23:43:37 +0000 | [diff] [blame] | 830 |   // Check to see if the first character after the '/*' is another /.  If so, | 
 | 831 |   // then this slash does not end the block comment, it is part of it. | 
 | 832 |   if (C == '/') | 
 | 833 |     C = *CurPtr++; | 
 | 834 |    | 
| Reid Spencer | 5f016e2 | 2007-07-11 17:01:13 +0000 | [diff] [blame] | 835 |   while (1) { | 
 | 836 |     // Skip over all non-interesting characters until we find end of buffer or a | 
 | 837 |     // (probably ending) '/' character. | 
 | 838 |     if (CurPtr + 24 < BufferEnd) { | 
 | 839 |       // While not aligned to a 16-byte boundary. | 
 | 840 |       while (C != '/' && ((intptr_t)CurPtr & 0x0F) != 0) | 
 | 841 |         C = *CurPtr++; | 
 | 842 |        | 
 | 843 |       if (C == '/') goto FoundSlash; | 
 | 844 |  | 
 | 845 | #ifdef __SSE2__ | 
 | 846 |       __m128i Slashes = _mm_set_epi8('/', '/', '/', '/', '/', '/', '/', '/', | 
 | 847 |                                      '/', '/', '/', '/', '/', '/', '/', '/'); | 
 | 848 |       while (CurPtr+16 <= BufferEnd && | 
 | 849 |              _mm_movemask_epi8(_mm_cmpeq_epi8(*(__m128i*)CurPtr, Slashes)) == 0) | 
 | 850 |         CurPtr += 16; | 
 | 851 | #elif __ALTIVEC__ | 
 | 852 |       __vector unsigned char Slashes = { | 
 | 853 |         '/', '/', '/', '/',  '/', '/', '/', '/',  | 
 | 854 |         '/', '/', '/', '/',  '/', '/', '/', '/' | 
 | 855 |       }; | 
 | 856 |       while (CurPtr+16 <= BufferEnd && | 
 | 857 |              !vec_any_eq(*(vector unsigned char*)CurPtr, Slashes)) | 
 | 858 |         CurPtr += 16; | 
 | 859 | #else     | 
 | 860 |       // Scan for '/' quickly.  Many block comments are very large. | 
 | 861 |       while (CurPtr[0] != '/' && | 
 | 862 |              CurPtr[1] != '/' && | 
 | 863 |              CurPtr[2] != '/' && | 
 | 864 |              CurPtr[3] != '/' && | 
 | 865 |              CurPtr+4 < BufferEnd) { | 
 | 866 |         CurPtr += 4; | 
 | 867 |       } | 
 | 868 | #endif | 
 | 869 |        | 
 | 870 |       // It has to be one of the bytes scanned, increment to it and read one. | 
 | 871 |       C = *CurPtr++; | 
 | 872 |     } | 
 | 873 |      | 
 | 874 |     // Loop to scan the remainder. | 
 | 875 |     while (C != '/' && C != '\0') | 
 | 876 |       C = *CurPtr++; | 
 | 877 |      | 
 | 878 |   FoundSlash: | 
 | 879 |     if (C == '/') { | 
 | 880 |       if (CurPtr[-2] == '*')  // We found the final */.  We're done! | 
 | 881 |         break; | 
 | 882 |        | 
 | 883 |       if ((CurPtr[-2] == '\n' || CurPtr[-2] == '\r')) { | 
 | 884 |         if (isEndOfBlockCommentWithEscapedNewLine(CurPtr-2, this)) { | 
 | 885 |           // We found the final */, though it had an escaped newline between the | 
 | 886 |           // * and /.  We're done! | 
 | 887 |           break; | 
 | 888 |         } | 
 | 889 |       } | 
 | 890 |       if (CurPtr[0] == '*' && CurPtr[1] != '/') { | 
 | 891 |         // If this is a /* inside of the comment, emit a warning.  Don't do this | 
 | 892 |         // if this is a /*/, which will end the comment.  This misses cases with | 
 | 893 |         // embedded escaped newlines, but oh well. | 
 | 894 |         Diag(CurPtr-1, diag::nested_block_comment); | 
 | 895 |       } | 
 | 896 |     } else if (C == 0 && CurPtr == BufferEnd+1) { | 
 | 897 |       Diag(BufferPtr, diag::err_unterminated_block_comment); | 
 | 898 |       // Note: the user probably forgot a */.  We could continue immediately | 
 | 899 |       // after the /*, but this would involve lexing a lot of what really is the | 
 | 900 |       // comment, which surely would confuse the parser. | 
 | 901 |       BufferPtr = CurPtr-1; | 
 | 902 |       return true; | 
 | 903 |     } | 
 | 904 |     C = *CurPtr++; | 
 | 905 |   } | 
 | 906 |    | 
 | 907 |   // If we are returning comments as tokens, return this comment as a token. | 
 | 908 |   if (KeepCommentMode) { | 
 | 909 |     Result.setKind(tok::comment); | 
 | 910 |     FormTokenWithChars(Result, CurPtr); | 
 | 911 |     return false; | 
 | 912 |   } | 
 | 913 |  | 
 | 914 |   // It is common for the tokens immediately after a /**/ comment to be | 
 | 915 |   // whitespace.  Instead of going through the big switch, handle it | 
 | 916 |   // efficiently now. | 
 | 917 |   if (isHorizontalWhitespace(*CurPtr)) { | 
| Chris Lattner | d217773 | 2007-07-20 16:59:19 +0000 | [diff] [blame] | 918 |     Result.setFlag(Token::LeadingSpace); | 
| Reid Spencer | 5f016e2 | 2007-07-11 17:01:13 +0000 | [diff] [blame] | 919 |     SkipWhitespace(Result, CurPtr+1); | 
 | 920 |     return true; | 
 | 921 |   } | 
 | 922 |  | 
 | 923 |   // Otherwise, just return so that the next character will be lexed as a token. | 
 | 924 |   BufferPtr = CurPtr; | 
| Chris Lattner | d217773 | 2007-07-20 16:59:19 +0000 | [diff] [blame] | 925 |   Result.setFlag(Token::LeadingSpace); | 
| Reid Spencer | 5f016e2 | 2007-07-11 17:01:13 +0000 | [diff] [blame] | 926 |   return true; | 
 | 927 | } | 
 | 928 |  | 
 | 929 | //===----------------------------------------------------------------------===// | 
 | 930 | // Primary Lexing Entry Points | 
 | 931 | //===----------------------------------------------------------------------===// | 
 | 932 |  | 
 | 933 | /// LexIncludeFilename - After the preprocessor has parsed a #include, lex and | 
 | 934 | /// (potentially) macro expand the filename. | 
| Chris Lattner | d217773 | 2007-07-20 16:59:19 +0000 | [diff] [blame] | 935 | void Lexer::LexIncludeFilename(Token &FilenameTok) { | 
| Reid Spencer | 5f016e2 | 2007-07-11 17:01:13 +0000 | [diff] [blame] | 936 |   assert(ParsingPreprocessorDirective && | 
 | 937 |          ParsingFilename == false && | 
 | 938 |          "Must be in a preprocessing directive!"); | 
 | 939 |  | 
 | 940 |   // We are now parsing a filename! | 
 | 941 |   ParsingFilename = true; | 
 | 942 |    | 
 | 943 |   // Lex the filename. | 
 | 944 |   Lex(FilenameTok); | 
 | 945 |  | 
 | 946 |   // We should have obtained the filename now. | 
 | 947 |   ParsingFilename = false; | 
 | 948 |    | 
 | 949 |   // No filename? | 
 | 950 |   if (FilenameTok.getKind() == tok::eom) | 
 | 951 |     Diag(FilenameTok.getLocation(), diag::err_pp_expects_filename); | 
 | 952 | } | 
 | 953 |  | 
 | 954 | /// ReadToEndOfLine - Read the rest of the current preprocessor line as an | 
 | 955 | /// uninterpreted string.  This switches the lexer out of directive mode. | 
 | 956 | std::string Lexer::ReadToEndOfLine() { | 
 | 957 |   assert(ParsingPreprocessorDirective && ParsingFilename == false && | 
 | 958 |          "Must be in a preprocessing directive!"); | 
 | 959 |   std::string Result; | 
| Chris Lattner | d217773 | 2007-07-20 16:59:19 +0000 | [diff] [blame] | 960 |   Token Tmp; | 
| Reid Spencer | 5f016e2 | 2007-07-11 17:01:13 +0000 | [diff] [blame] | 961 |  | 
 | 962 |   // CurPtr - Cache BufferPtr in an automatic variable. | 
 | 963 |   const char *CurPtr = BufferPtr; | 
 | 964 |   while (1) { | 
 | 965 |     char Char = getAndAdvanceChar(CurPtr, Tmp); | 
 | 966 |     switch (Char) { | 
 | 967 |     default: | 
 | 968 |       Result += Char; | 
 | 969 |       break; | 
 | 970 |     case 0:  // Null. | 
 | 971 |       // Found end of file? | 
 | 972 |       if (CurPtr-1 != BufferEnd) { | 
 | 973 |         // Nope, normal character, continue. | 
 | 974 |         Result += Char; | 
 | 975 |         break; | 
 | 976 |       } | 
 | 977 |       // FALL THROUGH. | 
 | 978 |     case '\r': | 
 | 979 |     case '\n': | 
 | 980 |       // Okay, we found the end of the line. First, back up past the \0, \r, \n. | 
 | 981 |       assert(CurPtr[-1] == Char && "Trigraphs for newline?"); | 
 | 982 |       BufferPtr = CurPtr-1; | 
 | 983 |        | 
 | 984 |       // Next, lex the character, which should handle the EOM transition. | 
 | 985 |       Lex(Tmp); | 
 | 986 |       assert(Tmp.getKind() == tok::eom && "Unexpected token!"); | 
 | 987 |        | 
 | 988 |       // Finally, we're done, return the string we found. | 
 | 989 |       return Result; | 
 | 990 |     } | 
 | 991 |   } | 
 | 992 | } | 
 | 993 |  | 
 | 994 | /// LexEndOfFile - CurPtr points to the end of this file.  Handle this | 
 | 995 | /// condition, reporting diagnostics and handling other edge cases as required. | 
 | 996 | /// This returns true if Result contains a token, false if PP.Lex should be | 
 | 997 | /// called again. | 
| Chris Lattner | d217773 | 2007-07-20 16:59:19 +0000 | [diff] [blame] | 998 | bool Lexer::LexEndOfFile(Token &Result, const char *CurPtr) { | 
| Reid Spencer | 5f016e2 | 2007-07-11 17:01:13 +0000 | [diff] [blame] | 999 |   // If we hit the end of the file while parsing a preprocessor directive, | 
 | 1000 |   // end the preprocessor directive first.  The next token returned will | 
 | 1001 |   // then be the end of file. | 
 | 1002 |   if (ParsingPreprocessorDirective) { | 
 | 1003 |     // Done parsing the "line". | 
 | 1004 |     ParsingPreprocessorDirective = false; | 
 | 1005 |     Result.setKind(tok::eom); | 
 | 1006 |     // Update the location of token as well as BufferPtr. | 
 | 1007 |     FormTokenWithChars(Result, CurPtr); | 
 | 1008 |      | 
 | 1009 |     // Restore comment saving mode, in case it was disabled for directive. | 
 | 1010 |     KeepCommentMode = PP.getCommentRetentionState(); | 
 | 1011 |     return true;  // Have a token. | 
 | 1012 |   }         | 
 | 1013 |  | 
 | 1014 |   // If we are in raw mode, return this event as an EOF token.  Let the caller | 
 | 1015 |   // that put us in raw mode handle the event. | 
 | 1016 |   if (LexingRawMode) { | 
 | 1017 |     Result.startToken(); | 
 | 1018 |     BufferPtr = BufferEnd; | 
 | 1019 |     FormTokenWithChars(Result, BufferEnd); | 
 | 1020 |     Result.setKind(tok::eof); | 
 | 1021 |     return true; | 
 | 1022 |   } | 
 | 1023 |    | 
 | 1024 |   // Otherwise, issue diagnostics for unterminated #if and missing newline. | 
 | 1025 |  | 
 | 1026 |   // If we are in a #if directive, emit an error. | 
 | 1027 |   while (!ConditionalStack.empty()) { | 
 | 1028 |     Diag(ConditionalStack.back().IfLoc, diag::err_pp_unterminated_conditional); | 
 | 1029 |     ConditionalStack.pop_back(); | 
 | 1030 |   } | 
 | 1031 |    | 
 | 1032 |   // If the file was empty or didn't end in a newline, issue a pedwarn. | 
 | 1033 |   if (CurPtr[-1] != '\n' && CurPtr[-1] != '\r') | 
 | 1034 |     Diag(BufferEnd, diag::ext_no_newline_eof); | 
 | 1035 |    | 
 | 1036 |   BufferPtr = CurPtr; | 
 | 1037 |  | 
 | 1038 |   // Finally, let the preprocessor handle this. | 
 | 1039 |   return PP.HandleEndOfFile(Result); | 
 | 1040 | } | 
 | 1041 |  | 
 | 1042 | /// isNextPPTokenLParen - Return 1 if the next unexpanded token lexed from | 
 | 1043 | /// the specified lexer will return a tok::l_paren token, 0 if it is something | 
 | 1044 | /// else and 2 if there are no more tokens in the buffer controlled by the | 
 | 1045 | /// lexer. | 
 | 1046 | unsigned Lexer::isNextPPTokenLParen() { | 
 | 1047 |   assert(!LexingRawMode && "How can we expand a macro from a skipping buffer?"); | 
 | 1048 |    | 
 | 1049 |   // Switch to 'skipping' mode.  This will ensure that we can lex a token | 
 | 1050 |   // without emitting diagnostics, disables macro expansion, and will cause EOF | 
 | 1051 |   // to return an EOF token instead of popping the include stack. | 
 | 1052 |   LexingRawMode = true; | 
 | 1053 |    | 
 | 1054 |   // Save state that can be changed while lexing so that we can restore it. | 
 | 1055 |   const char *TmpBufferPtr = BufferPtr; | 
 | 1056 |    | 
| Chris Lattner | d217773 | 2007-07-20 16:59:19 +0000 | [diff] [blame] | 1057 |   Token Tok; | 
| Reid Spencer | 5f016e2 | 2007-07-11 17:01:13 +0000 | [diff] [blame] | 1058 |   Tok.startToken(); | 
 | 1059 |   LexTokenInternal(Tok); | 
 | 1060 |    | 
 | 1061 |   // Restore state that may have changed. | 
 | 1062 |   BufferPtr = TmpBufferPtr; | 
 | 1063 |    | 
 | 1064 |   // Restore the lexer back to non-skipping mode. | 
 | 1065 |   LexingRawMode = false; | 
 | 1066 |    | 
 | 1067 |   if (Tok.getKind() == tok::eof) | 
 | 1068 |     return 2; | 
 | 1069 |   return Tok.getKind() == tok::l_paren; | 
 | 1070 | } | 
 | 1071 |  | 
 | 1072 |  | 
 | 1073 | /// LexTokenInternal - This implements a simple C family lexer.  It is an | 
 | 1074 | /// extremely performance critical piece of code.  This assumes that the buffer | 
 | 1075 | /// has a null character at the end of the file.  Return true if an error | 
 | 1076 | /// occurred and compilation should terminate, false if normal.  This returns a | 
 | 1077 | /// preprocessing token, not a normal token, as such, it is an internal | 
 | 1078 | /// interface.  It assumes that the Flags of result have been cleared before | 
 | 1079 | /// calling this. | 
| Chris Lattner | d217773 | 2007-07-20 16:59:19 +0000 | [diff] [blame] | 1080 | void Lexer::LexTokenInternal(Token &Result) { | 
| Reid Spencer | 5f016e2 | 2007-07-11 17:01:13 +0000 | [diff] [blame] | 1081 | LexNextToken: | 
 | 1082 |   // New token, can't need cleaning yet. | 
| Chris Lattner | d217773 | 2007-07-20 16:59:19 +0000 | [diff] [blame] | 1083 |   Result.clearFlag(Token::NeedsCleaning); | 
| Reid Spencer | 5f016e2 | 2007-07-11 17:01:13 +0000 | [diff] [blame] | 1084 |   Result.setIdentifierInfo(0); | 
 | 1085 |    | 
 | 1086 |   // CurPtr - Cache BufferPtr in an automatic variable. | 
 | 1087 |   const char *CurPtr = BufferPtr; | 
 | 1088 |  | 
 | 1089 |   // Small amounts of horizontal whitespace is very common between tokens. | 
 | 1090 |   if ((*CurPtr == ' ') || (*CurPtr == '\t')) { | 
 | 1091 |     ++CurPtr; | 
 | 1092 |     while ((*CurPtr == ' ') || (*CurPtr == '\t')) | 
 | 1093 |       ++CurPtr; | 
 | 1094 |     BufferPtr = CurPtr; | 
| Chris Lattner | d217773 | 2007-07-20 16:59:19 +0000 | [diff] [blame] | 1095 |     Result.setFlag(Token::LeadingSpace); | 
| Reid Spencer | 5f016e2 | 2007-07-11 17:01:13 +0000 | [diff] [blame] | 1096 |   } | 
 | 1097 |    | 
 | 1098 |   unsigned SizeTmp, SizeTmp2;   // Temporaries for use in cases below. | 
 | 1099 |    | 
 | 1100 |   // Read a character, advancing over it. | 
 | 1101 |   char Char = getAndAdvanceChar(CurPtr, Result); | 
 | 1102 |   switch (Char) { | 
 | 1103 |   case 0:  // Null. | 
 | 1104 |     // Found end of file? | 
 | 1105 |     if (CurPtr-1 == BufferEnd) { | 
 | 1106 |       // Read the PP instance variable into an automatic variable, because | 
 | 1107 |       // LexEndOfFile will often delete 'this'. | 
 | 1108 |       Preprocessor &PPCache = PP; | 
 | 1109 |       if (LexEndOfFile(Result, CurPtr-1))  // Retreat back into the file. | 
 | 1110 |         return;   // Got a token to return. | 
 | 1111 |       return PPCache.Lex(Result); | 
 | 1112 |     } | 
 | 1113 |      | 
 | 1114 |     Diag(CurPtr-1, diag::null_in_file); | 
| Chris Lattner | d217773 | 2007-07-20 16:59:19 +0000 | [diff] [blame] | 1115 |     Result.setFlag(Token::LeadingSpace); | 
| Reid Spencer | 5f016e2 | 2007-07-11 17:01:13 +0000 | [diff] [blame] | 1116 |     SkipWhitespace(Result, CurPtr); | 
 | 1117 |     goto LexNextToken;   // GCC isn't tail call eliminating. | 
 | 1118 |   case '\n': | 
 | 1119 |   case '\r': | 
 | 1120 |     // If we are inside a preprocessor directive and we see the end of line, | 
 | 1121 |     // we know we are done with the directive, so return an EOM token. | 
 | 1122 |     if (ParsingPreprocessorDirective) { | 
 | 1123 |       // Done parsing the "line". | 
 | 1124 |       ParsingPreprocessorDirective = false; | 
 | 1125 |        | 
 | 1126 |       // Restore comment saving mode, in case it was disabled for directive. | 
 | 1127 |       KeepCommentMode = PP.getCommentRetentionState(); | 
 | 1128 |        | 
 | 1129 |       // Since we consumed a newline, we are back at the start of a line. | 
 | 1130 |       IsAtStartOfLine = true; | 
 | 1131 |        | 
 | 1132 |       Result.setKind(tok::eom); | 
 | 1133 |       break; | 
 | 1134 |     } | 
 | 1135 |     // The returned token is at the start of the line. | 
| Chris Lattner | d217773 | 2007-07-20 16:59:19 +0000 | [diff] [blame] | 1136 |     Result.setFlag(Token::StartOfLine); | 
| Reid Spencer | 5f016e2 | 2007-07-11 17:01:13 +0000 | [diff] [blame] | 1137 |     // No leading whitespace seen so far. | 
| Chris Lattner | d217773 | 2007-07-20 16:59:19 +0000 | [diff] [blame] | 1138 |     Result.clearFlag(Token::LeadingSpace); | 
| Reid Spencer | 5f016e2 | 2007-07-11 17:01:13 +0000 | [diff] [blame] | 1139 |     SkipWhitespace(Result, CurPtr); | 
 | 1140 |     goto LexNextToken;   // GCC isn't tail call eliminating. | 
 | 1141 |   case ' ': | 
 | 1142 |   case '\t': | 
 | 1143 |   case '\f': | 
 | 1144 |   case '\v': | 
| Chris Lattner | 8133cfc | 2007-07-22 06:29:05 +0000 | [diff] [blame] | 1145 |   SkipHorizontalWhitespace: | 
| Chris Lattner | d217773 | 2007-07-20 16:59:19 +0000 | [diff] [blame] | 1146 |     Result.setFlag(Token::LeadingSpace); | 
| Reid Spencer | 5f016e2 | 2007-07-11 17:01:13 +0000 | [diff] [blame] | 1147 |     SkipWhitespace(Result, CurPtr); | 
| Chris Lattner | 8133cfc | 2007-07-22 06:29:05 +0000 | [diff] [blame] | 1148 |  | 
 | 1149 |   SkipIgnoredUnits: | 
 | 1150 |     CurPtr = BufferPtr; | 
 | 1151 |      | 
 | 1152 |     // If the next token is obviously a // or /* */ comment, skip it efficiently | 
 | 1153 |     // too (without going through the big switch stmt). | 
 | 1154 |     if (CurPtr[0] == '/' && CurPtr[1] == '/' && !KeepCommentMode) { | 
 | 1155 |       SkipBCPLComment(Result, CurPtr+2); | 
 | 1156 |       goto SkipIgnoredUnits; | 
 | 1157 |     } else if (CurPtr[0] == '/' && CurPtr[1] == '*' && !KeepCommentMode) { | 
 | 1158 |       SkipBlockComment(Result, CurPtr+2); | 
 | 1159 |       goto SkipIgnoredUnits; | 
 | 1160 |     } else if (isHorizontalWhitespace(*CurPtr)) { | 
 | 1161 |       goto SkipHorizontalWhitespace; | 
 | 1162 |     } | 
| Reid Spencer | 5f016e2 | 2007-07-11 17:01:13 +0000 | [diff] [blame] | 1163 |     goto LexNextToken;   // GCC isn't tail call eliminating. | 
 | 1164 |  | 
 | 1165 |   case 'L': | 
 | 1166 |     // Notify MIOpt that we read a non-whitespace/non-comment token. | 
 | 1167 |     MIOpt.ReadToken(); | 
 | 1168 |     Char = getCharAndSize(CurPtr, SizeTmp); | 
 | 1169 |  | 
 | 1170 |     // Wide string literal. | 
 | 1171 |     if (Char == '"') | 
 | 1172 |       return LexStringLiteral(Result, ConsumeChar(CurPtr, SizeTmp, Result), | 
 | 1173 |                               true); | 
 | 1174 |  | 
 | 1175 |     // Wide character constant. | 
 | 1176 |     if (Char == '\'') | 
 | 1177 |       return LexCharConstant(Result, ConsumeChar(CurPtr, SizeTmp, Result)); | 
 | 1178 |     // FALL THROUGH, treating L like the start of an identifier. | 
 | 1179 |      | 
 | 1180 |   // C99 6.4.2: Identifiers. | 
 | 1181 |   case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G': | 
 | 1182 |   case 'H': case 'I': case 'J': case 'K':    /*'L'*/case 'M': case 'N': | 
 | 1183 |   case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U': | 
 | 1184 |   case 'V': case 'W': case 'X': case 'Y': case 'Z': | 
 | 1185 |   case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g': | 
 | 1186 |   case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n': | 
 | 1187 |   case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u': | 
 | 1188 |   case 'v': case 'w': case 'x': case 'y': case 'z': | 
 | 1189 |   case '_': | 
 | 1190 |     // Notify MIOpt that we read a non-whitespace/non-comment token. | 
 | 1191 |     MIOpt.ReadToken(); | 
 | 1192 |     return LexIdentifier(Result, CurPtr); | 
 | 1193 |      | 
 | 1194 |   // C99 6.4.4.1: Integer Constants. | 
 | 1195 |   // C99 6.4.4.2: Floating Constants. | 
 | 1196 |   case '0': case '1': case '2': case '3': case '4': | 
 | 1197 |   case '5': case '6': case '7': case '8': case '9': | 
 | 1198 |     // Notify MIOpt that we read a non-whitespace/non-comment token. | 
 | 1199 |     MIOpt.ReadToken(); | 
 | 1200 |     return LexNumericConstant(Result, CurPtr); | 
 | 1201 |      | 
 | 1202 |   // C99 6.4.4: Character Constants. | 
 | 1203 |   case '\'': | 
 | 1204 |     // Notify MIOpt that we read a non-whitespace/non-comment token. | 
 | 1205 |     MIOpt.ReadToken(); | 
 | 1206 |     return LexCharConstant(Result, CurPtr); | 
 | 1207 |  | 
 | 1208 |   // C99 6.4.5: String Literals. | 
 | 1209 |   case '"': | 
 | 1210 |     // Notify MIOpt that we read a non-whitespace/non-comment token. | 
 | 1211 |     MIOpt.ReadToken(); | 
 | 1212 |     return LexStringLiteral(Result, CurPtr, false); | 
 | 1213 |  | 
 | 1214 |   // C99 6.4.6: Punctuators. | 
 | 1215 |   case '?': | 
 | 1216 |     Result.setKind(tok::question); | 
 | 1217 |     break; | 
 | 1218 |   case '[': | 
 | 1219 |     Result.setKind(tok::l_square); | 
 | 1220 |     break; | 
 | 1221 |   case ']': | 
 | 1222 |     Result.setKind(tok::r_square); | 
 | 1223 |     break; | 
 | 1224 |   case '(': | 
 | 1225 |     Result.setKind(tok::l_paren); | 
 | 1226 |     break; | 
 | 1227 |   case ')': | 
 | 1228 |     Result.setKind(tok::r_paren); | 
 | 1229 |     break; | 
 | 1230 |   case '{': | 
 | 1231 |     Result.setKind(tok::l_brace); | 
 | 1232 |     break; | 
 | 1233 |   case '}': | 
 | 1234 |     Result.setKind(tok::r_brace); | 
 | 1235 |     break; | 
 | 1236 |   case '.': | 
 | 1237 |     Char = getCharAndSize(CurPtr, SizeTmp); | 
 | 1238 |     if (Char >= '0' && Char <= '9') { | 
 | 1239 |       // Notify MIOpt that we read a non-whitespace/non-comment token. | 
 | 1240 |       MIOpt.ReadToken(); | 
 | 1241 |  | 
 | 1242 |       return LexNumericConstant(Result, ConsumeChar(CurPtr, SizeTmp, Result)); | 
 | 1243 |     } else if (Features.CPlusPlus && Char == '*') { | 
 | 1244 |       Result.setKind(tok::periodstar); | 
 | 1245 |       CurPtr += SizeTmp; | 
 | 1246 |     } else if (Char == '.' && | 
 | 1247 |                getCharAndSize(CurPtr+SizeTmp, SizeTmp2) == '.') { | 
 | 1248 |       Result.setKind(tok::ellipsis); | 
 | 1249 |       CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result), | 
 | 1250 |                            SizeTmp2, Result); | 
 | 1251 |     } else { | 
 | 1252 |       Result.setKind(tok::period); | 
 | 1253 |     } | 
 | 1254 |     break; | 
 | 1255 |   case '&': | 
 | 1256 |     Char = getCharAndSize(CurPtr, SizeTmp); | 
 | 1257 |     if (Char == '&') { | 
 | 1258 |       Result.setKind(tok::ampamp); | 
 | 1259 |       CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); | 
 | 1260 |     } else if (Char == '=') { | 
 | 1261 |       Result.setKind(tok::ampequal); | 
 | 1262 |       CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); | 
 | 1263 |     } else { | 
 | 1264 |       Result.setKind(tok::amp); | 
 | 1265 |     } | 
 | 1266 |     break; | 
 | 1267 |   case '*':  | 
 | 1268 |     if (getCharAndSize(CurPtr, SizeTmp) == '=') { | 
 | 1269 |       Result.setKind(tok::starequal); | 
 | 1270 |       CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); | 
 | 1271 |     } else { | 
 | 1272 |       Result.setKind(tok::star); | 
 | 1273 |     } | 
 | 1274 |     break; | 
 | 1275 |   case '+': | 
 | 1276 |     Char = getCharAndSize(CurPtr, SizeTmp); | 
 | 1277 |     if (Char == '+') { | 
 | 1278 |       Result.setKind(tok::plusplus); | 
 | 1279 |       CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); | 
 | 1280 |     } else if (Char == '=') { | 
 | 1281 |       Result.setKind(tok::plusequal); | 
 | 1282 |       CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); | 
 | 1283 |     } else { | 
 | 1284 |       Result.setKind(tok::plus); | 
 | 1285 |     } | 
 | 1286 |     break; | 
 | 1287 |   case '-': | 
 | 1288 |     Char = getCharAndSize(CurPtr, SizeTmp); | 
 | 1289 |     if (Char == '-') { | 
 | 1290 |       Result.setKind(tok::minusminus); | 
 | 1291 |       CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); | 
 | 1292 |     } else if (Char == '>' && Features.CPlusPlus &&  | 
 | 1293 |                getCharAndSize(CurPtr+SizeTmp, SizeTmp2) == '*') { | 
 | 1294 |       Result.setKind(tok::arrowstar);  // C++ ->* | 
 | 1295 |       CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result), | 
 | 1296 |                            SizeTmp2, Result); | 
 | 1297 |     } else if (Char == '>') { | 
 | 1298 |       Result.setKind(tok::arrow); | 
 | 1299 |       CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); | 
 | 1300 |     } else if (Char == '=') { | 
 | 1301 |       Result.setKind(tok::minusequal); | 
 | 1302 |       CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); | 
 | 1303 |     } else { | 
 | 1304 |       Result.setKind(tok::minus); | 
 | 1305 |     } | 
 | 1306 |     break; | 
 | 1307 |   case '~': | 
 | 1308 |     Result.setKind(tok::tilde); | 
 | 1309 |     break; | 
 | 1310 |   case '!': | 
 | 1311 |     if (getCharAndSize(CurPtr, SizeTmp) == '=') { | 
 | 1312 |       Result.setKind(tok::exclaimequal); | 
 | 1313 |       CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); | 
 | 1314 |     } else { | 
 | 1315 |       Result.setKind(tok::exclaim); | 
 | 1316 |     } | 
 | 1317 |     break; | 
 | 1318 |   case '/': | 
 | 1319 |     // 6.4.9: Comments | 
 | 1320 |     Char = getCharAndSize(CurPtr, SizeTmp); | 
 | 1321 |     if (Char == '/') {         // BCPL comment. | 
| Chris Lattner | 8133cfc | 2007-07-22 06:29:05 +0000 | [diff] [blame] | 1322 |       if (SkipBCPLComment(Result, ConsumeChar(CurPtr, SizeTmp, Result))) { | 
 | 1323 |         // It is common for the tokens immediately after a // comment to be | 
| Chris Lattner | 409a036 | 2007-07-22 18:38:25 +0000 | [diff] [blame] | 1324 |         // whitespace (indentation for the next line).  Instead of going through | 
 | 1325 |         // the big switch, handle it efficiently now. | 
| Chris Lattner | 8133cfc | 2007-07-22 06:29:05 +0000 | [diff] [blame] | 1326 |         goto SkipIgnoredUnits; | 
 | 1327 |       }         | 
| Reid Spencer | 5f016e2 | 2007-07-11 17:01:13 +0000 | [diff] [blame] | 1328 |       return; // KeepCommentMode | 
 | 1329 |     } else if (Char == '*') {  // /**/ comment. | 
 | 1330 |       if (SkipBlockComment(Result, ConsumeChar(CurPtr, SizeTmp, Result))) | 
 | 1331 |         goto LexNextToken;   // GCC isn't tail call eliminating. | 
 | 1332 |       return; // KeepCommentMode | 
 | 1333 |     } else if (Char == '=') { | 
 | 1334 |       Result.setKind(tok::slashequal); | 
 | 1335 |       CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); | 
 | 1336 |     } else { | 
 | 1337 |       Result.setKind(tok::slash); | 
 | 1338 |     } | 
 | 1339 |     break; | 
 | 1340 |   case '%': | 
 | 1341 |     Char = getCharAndSize(CurPtr, SizeTmp); | 
 | 1342 |     if (Char == '=') { | 
 | 1343 |       Result.setKind(tok::percentequal); | 
 | 1344 |       CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); | 
 | 1345 |     } else if (Features.Digraphs && Char == '>') { | 
 | 1346 |       Result.setKind(tok::r_brace);    // '%>' -> '}' | 
 | 1347 |       CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); | 
 | 1348 |     } else if (Features.Digraphs && Char == ':') { | 
 | 1349 |       CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); | 
 | 1350 |       Char = getCharAndSize(CurPtr, SizeTmp); | 
 | 1351 |       if (Char == '%' && getCharAndSize(CurPtr+SizeTmp, SizeTmp2) == ':') { | 
 | 1352 |         Result.setKind(tok::hashhash);   // '%:%:' -> '##' | 
 | 1353 |         CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result), | 
 | 1354 |                              SizeTmp2, Result); | 
 | 1355 |       } else if (Char == '@' && Features.Microsoft) {  // %:@ -> #@ -> Charize | 
 | 1356 |         Result.setKind(tok::hashat); | 
 | 1357 |         CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); | 
 | 1358 |         Diag(BufferPtr, diag::charize_microsoft_ext); | 
 | 1359 |       } else { | 
 | 1360 |         Result.setKind(tok::hash);       // '%:' -> '#' | 
 | 1361 |          | 
 | 1362 |         // We parsed a # character.  If this occurs at the start of the line, | 
 | 1363 |         // it's actually the start of a preprocessing directive.  Callback to | 
 | 1364 |         // the preprocessor to handle it. | 
 | 1365 |         // FIXME: -fpreprocessed mode?? | 
 | 1366 |         if (Result.isAtStartOfLine() && !LexingRawMode) { | 
 | 1367 |           BufferPtr = CurPtr; | 
 | 1368 |           PP.HandleDirective(Result); | 
 | 1369 |            | 
 | 1370 |           // As an optimization, if the preprocessor didn't switch lexers, tail | 
 | 1371 |           // recurse. | 
 | 1372 |           if (PP.isCurrentLexer(this)) { | 
 | 1373 |             // Start a new token. If this is a #include or something, the PP may | 
 | 1374 |             // want us starting at the beginning of the line again.  If so, set | 
 | 1375 |             // the StartOfLine flag. | 
 | 1376 |             if (IsAtStartOfLine) { | 
| Chris Lattner | d217773 | 2007-07-20 16:59:19 +0000 | [diff] [blame] | 1377 |               Result.setFlag(Token::StartOfLine); | 
| Reid Spencer | 5f016e2 | 2007-07-11 17:01:13 +0000 | [diff] [blame] | 1378 |               IsAtStartOfLine = false; | 
 | 1379 |             } | 
 | 1380 |             goto LexNextToken;   // GCC isn't tail call eliminating. | 
 | 1381 |           } | 
 | 1382 |            | 
 | 1383 |           return PP.Lex(Result); | 
 | 1384 |         } | 
 | 1385 |       } | 
 | 1386 |     } else { | 
 | 1387 |       Result.setKind(tok::percent); | 
 | 1388 |     } | 
 | 1389 |     break; | 
 | 1390 |   case '<': | 
 | 1391 |     Char = getCharAndSize(CurPtr, SizeTmp); | 
 | 1392 |     if (ParsingFilename) { | 
 | 1393 |       return LexAngledStringLiteral(Result, CurPtr+SizeTmp); | 
 | 1394 |     } else if (Char == '<' && | 
 | 1395 |                getCharAndSize(CurPtr+SizeTmp, SizeTmp2) == '=') { | 
 | 1396 |       Result.setKind(tok::lesslessequal); | 
 | 1397 |       CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result), | 
 | 1398 |                            SizeTmp2, Result); | 
 | 1399 |     } else if (Char == '<') { | 
 | 1400 |       Result.setKind(tok::lessless); | 
 | 1401 |       CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); | 
 | 1402 |     } else if (Char == '=') { | 
 | 1403 |       Result.setKind(tok::lessequal); | 
 | 1404 |       CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); | 
 | 1405 |     } else if (Features.Digraphs && Char == ':') { | 
 | 1406 |       Result.setKind(tok::l_square); // '<:' -> '[' | 
 | 1407 |       CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); | 
 | 1408 |     } else if (Features.Digraphs && Char == '>') { | 
 | 1409 |       Result.setKind(tok::l_brace); // '<%' -> '{' | 
 | 1410 |       CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); | 
 | 1411 |     } else { | 
 | 1412 |       Result.setKind(tok::less); | 
 | 1413 |     } | 
 | 1414 |     break; | 
 | 1415 |   case '>': | 
 | 1416 |     Char = getCharAndSize(CurPtr, SizeTmp); | 
 | 1417 |     if (Char == '=') { | 
 | 1418 |       Result.setKind(tok::greaterequal); | 
 | 1419 |       CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); | 
 | 1420 |     } else if (Char == '>' &&  | 
 | 1421 |                getCharAndSize(CurPtr+SizeTmp, SizeTmp2) == '=') { | 
 | 1422 |       Result.setKind(tok::greatergreaterequal); | 
 | 1423 |       CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result), | 
 | 1424 |                            SizeTmp2, Result); | 
 | 1425 |     } else if (Char == '>') { | 
 | 1426 |       Result.setKind(tok::greatergreater); | 
 | 1427 |       CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); | 
 | 1428 |     } else { | 
 | 1429 |       Result.setKind(tok::greater); | 
 | 1430 |     } | 
 | 1431 |     break; | 
 | 1432 |   case '^': | 
 | 1433 |     Char = getCharAndSize(CurPtr, SizeTmp); | 
 | 1434 |     if (Char == '=') { | 
 | 1435 |       Result.setKind(tok::caretequal); | 
 | 1436 |       CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); | 
 | 1437 |     } else { | 
 | 1438 |       Result.setKind(tok::caret); | 
 | 1439 |     } | 
 | 1440 |     break; | 
 | 1441 |   case '|': | 
 | 1442 |     Char = getCharAndSize(CurPtr, SizeTmp); | 
 | 1443 |     if (Char == '=') { | 
 | 1444 |       Result.setKind(tok::pipeequal); | 
 | 1445 |       CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); | 
 | 1446 |     } else if (Char == '|') { | 
 | 1447 |       Result.setKind(tok::pipepipe); | 
 | 1448 |       CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); | 
 | 1449 |     } else { | 
 | 1450 |       Result.setKind(tok::pipe); | 
 | 1451 |     } | 
 | 1452 |     break; | 
 | 1453 |   case ':': | 
 | 1454 |     Char = getCharAndSize(CurPtr, SizeTmp); | 
 | 1455 |     if (Features.Digraphs && Char == '>') { | 
 | 1456 |       Result.setKind(tok::r_square); // ':>' -> ']' | 
 | 1457 |       CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); | 
 | 1458 |     } else if (Features.CPlusPlus && Char == ':') { | 
 | 1459 |       Result.setKind(tok::coloncolon); | 
 | 1460 |       CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); | 
 | 1461 |     } else {     | 
 | 1462 |       Result.setKind(tok::colon); | 
 | 1463 |     } | 
 | 1464 |     break; | 
 | 1465 |   case ';': | 
 | 1466 |     Result.setKind(tok::semi); | 
 | 1467 |     break; | 
 | 1468 |   case '=': | 
 | 1469 |     Char = getCharAndSize(CurPtr, SizeTmp); | 
 | 1470 |     if (Char == '=') { | 
 | 1471 |       Result.setKind(tok::equalequal); | 
 | 1472 |       CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); | 
 | 1473 |     } else {       | 
 | 1474 |       Result.setKind(tok::equal); | 
 | 1475 |     } | 
 | 1476 |     break; | 
 | 1477 |   case ',': | 
 | 1478 |     Result.setKind(tok::comma); | 
 | 1479 |     break; | 
 | 1480 |   case '#': | 
 | 1481 |     Char = getCharAndSize(CurPtr, SizeTmp); | 
 | 1482 |     if (Char == '#') { | 
 | 1483 |       Result.setKind(tok::hashhash); | 
 | 1484 |       CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); | 
 | 1485 |     } else if (Char == '@' && Features.Microsoft) {  // #@ -> Charize | 
 | 1486 |       Result.setKind(tok::hashat); | 
 | 1487 |       Diag(BufferPtr, diag::charize_microsoft_ext); | 
 | 1488 |       CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); | 
 | 1489 |     } else { | 
 | 1490 |       Result.setKind(tok::hash); | 
 | 1491 |       // We parsed a # character.  If this occurs at the start of the line, | 
 | 1492 |       // it's actually the start of a preprocessing directive.  Callback to | 
 | 1493 |       // the preprocessor to handle it. | 
 | 1494 |       // FIXME: -fpreprocessed mode?? | 
 | 1495 |       if (Result.isAtStartOfLine() && !LexingRawMode) { | 
 | 1496 |         BufferPtr = CurPtr; | 
 | 1497 |         PP.HandleDirective(Result); | 
 | 1498 |          | 
 | 1499 |         // As an optimization, if the preprocessor didn't switch lexers, tail | 
 | 1500 |         // recurse. | 
 | 1501 |         if (PP.isCurrentLexer(this)) { | 
 | 1502 |           // Start a new token.  If this is a #include or something, the PP may | 
 | 1503 |           // want us starting at the beginning of the line again.  If so, set | 
 | 1504 |           // the StartOfLine flag. | 
 | 1505 |           if (IsAtStartOfLine) { | 
| Chris Lattner | d217773 | 2007-07-20 16:59:19 +0000 | [diff] [blame] | 1506 |             Result.setFlag(Token::StartOfLine); | 
| Reid Spencer | 5f016e2 | 2007-07-11 17:01:13 +0000 | [diff] [blame] | 1507 |             IsAtStartOfLine = false; | 
 | 1508 |           } | 
 | 1509 |           goto LexNextToken;   // GCC isn't tail call eliminating. | 
 | 1510 |         } | 
 | 1511 |         return PP.Lex(Result); | 
 | 1512 |       } | 
 | 1513 |     } | 
 | 1514 |     break; | 
 | 1515 |  | 
 | 1516 |   case '\\': | 
 | 1517 |     // FIXME: UCN's. | 
 | 1518 |     // FALL THROUGH. | 
 | 1519 |   default: | 
 | 1520 |     // Objective C support. | 
 | 1521 |     if (CurPtr[-1] == '@' && Features.ObjC1) { | 
 | 1522 |       Result.setKind(tok::at); | 
 | 1523 |       break; | 
 | 1524 |     } else if (CurPtr[-1] == '$' && Features.DollarIdents) {// $ in identifiers. | 
 | 1525 |       Diag(CurPtr-1, diag::ext_dollar_in_identifier); | 
 | 1526 |       // Notify MIOpt that we read a non-whitespace/non-comment token. | 
 | 1527 |       MIOpt.ReadToken(); | 
 | 1528 |       return LexIdentifier(Result, CurPtr); | 
 | 1529 |     } | 
 | 1530 |      | 
 | 1531 |     Result.setKind(tok::unknown); | 
 | 1532 |     break; | 
 | 1533 |   } | 
 | 1534 |    | 
 | 1535 |   // Notify MIOpt that we read a non-whitespace/non-comment token. | 
 | 1536 |   MIOpt.ReadToken(); | 
 | 1537 |  | 
 | 1538 |   // Update the location of token as well as BufferPtr. | 
 | 1539 |   FormTokenWithChars(Result, CurPtr); | 
 | 1540 | } |