blob: 27798be618e7f13199163ef1749516f74695a07d [file] [log] [blame]
Chris Lattner8e3a8e02007-11-18 08:46:26 +00001//===- LLLexer.cpp - Lexer for .ll Files ----------------------------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file was developed by Chris Lattner and is distributed under
6// the University of Illinois Open Source License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// Implement the Lexer for .ll files.
11//
12//===----------------------------------------------------------------------===//
13
14#include "LLLexer.h"
15#include "ParserInternals.h"
16#include "llvm/Support/MemoryBuffer.h"
17
18#include <list>
19#include "llvmAsmParser.h"
20using namespace llvm;
21
22//===----------------------------------------------------------------------===//
23// Helper functions.
24//===----------------------------------------------------------------------===//
25
26// atoull - Convert an ascii string of decimal digits into the unsigned long
27// long representation... this does not have to do input error checking,
28// because we know that the input will be matched by a suitable regex...
29//
30static uint64_t atoull(const char *Buffer, const char *End) {
31 uint64_t Result = 0;
32 for (; Buffer != End; Buffer++) {
33 uint64_t OldRes = Result;
34 Result *= 10;
35 Result += *Buffer-'0';
36 if (Result < OldRes) { // Uh, oh, overflow detected!!!
37 GenerateError("constant bigger than 64 bits detected!");
38 return 0;
39 }
40 }
41 return Result;
42}
43
44static uint64_t HexIntToVal(const char *Buffer, const char *End) {
45 uint64_t Result = 0;
46 for (; Buffer != End; ++Buffer) {
47 uint64_t OldRes = Result;
48 Result *= 16;
49 char C = *Buffer;
50 if (C >= '0' && C <= '9')
51 Result += C-'0';
52 else if (C >= 'A' && C <= 'F')
53 Result += C-'A'+10;
54 else if (C >= 'a' && C <= 'f')
55 Result += C-'a'+10;
56
57 if (Result < OldRes) { // Uh, oh, overflow detected!!!
58 GenerateError("constant bigger than 64 bits detected!");
59 return 0;
60 }
61 }
62 return Result;
63}
64
65// HexToFP - Convert the ascii string in hexadecimal format to the floating
66// point representation of it.
67//
68static double HexToFP(const char *Buffer, const char *End) {
69 return BitsToDouble(HexIntToVal(Buffer, End)); // Cast Hex constant to double
70}
71
72static void HexToIntPair(const char *Buffer, const char *End, uint64_t Pair[2]){
73 Pair[0] = 0;
74 for (int i=0; i<16; i++, Buffer++) {
75 assert(Buffer != End);
76 Pair[0] *= 16;
77 char C = *Buffer;
78 if (C >= '0' && C <= '9')
79 Pair[0] += C-'0';
80 else if (C >= 'A' && C <= 'F')
81 Pair[0] += C-'A'+10;
82 else if (C >= 'a' && C <= 'f')
83 Pair[0] += C-'a'+10;
84 }
85 Pair[1] = 0;
86 for (int i=0; i<16 && Buffer != End; i++, Buffer++) {
87 Pair[1] *= 16;
88 char C = *Buffer;
89 if (C >= '0' && C <= '9')
90 Pair[1] += C-'0';
91 else if (C >= 'A' && C <= 'F')
92 Pair[1] += C-'A'+10;
93 else if (C >= 'a' && C <= 'f')
94 Pair[1] += C-'a'+10;
95 }
Chris Lattnerd343c6b2007-11-18 18:25:18 +000096 if (Buffer != End)
Chris Lattner8e3a8e02007-11-18 08:46:26 +000097 GenerateError("constant bigger than 128 bits detected!");
98}
99
100// UnEscapeLexed - Run through the specified buffer and change \xx codes to the
101// appropriate character.
102static void UnEscapeLexed(std::string &Str) {
103 if (Str.empty()) return;
104
105 char *Buffer = &Str[0], *EndBuffer = Buffer+Str.size();
106 char *BOut = Buffer;
107 for (char *BIn = Buffer; BIn != EndBuffer; ) {
108 if (BIn[0] == '\\') {
109 if (BIn < EndBuffer-1 && BIn[1] == '\\') {
110 *BOut++ = '\\'; // Two \ becomes one
111 BIn += 2;
112 } else if (BIn < EndBuffer-2 && isxdigit(BIn[1]) && isxdigit(BIn[2])) {
113 char Tmp = BIn[3]; BIn[3] = 0; // Terminate string
114 *BOut = (char)strtol(BIn+1, 0, 16); // Convert to number
115 BIn[3] = Tmp; // Restore character
116 BIn += 3; // Skip over handled chars
117 ++BOut;
118 } else {
119 *BOut++ = *BIn++;
120 }
121 } else {
122 *BOut++ = *BIn++;
123 }
124 }
125 Str.resize(BOut-Buffer);
126}
127
128/// isLabelChar - Return true for [-a-zA-Z$._0-9].
129static bool isLabelChar(char C) {
130 return isalnum(C) || C == '-' || C == '$' || C == '.' || C == '_';
131}
132
133
134/// isLabelTail - Return true if this pointer points to a valid end of a label.
135static const char *isLabelTail(const char *CurPtr) {
136 while (1) {
137 if (CurPtr[0] == ':') return CurPtr+1;
138 if (!isLabelChar(CurPtr[0])) return 0;
139 ++CurPtr;
140 }
141}
142
143
144
145//===----------------------------------------------------------------------===//
146// Lexer definition.
147//===----------------------------------------------------------------------===//
148
149// FIXME: REMOVE THIS.
150#define YYEOF 0
151#define YYERROR -2
152
153LLLexer::LLLexer(MemoryBuffer *StartBuf) : CurLineNo(1), CurBuf(StartBuf) {
154 CurPtr = CurBuf->getBufferStart();
155}
156
157std::string LLLexer::getFilename() const {
158 return CurBuf->getBufferIdentifier();
159}
160
161int LLLexer::getNextChar() {
162 char CurChar = *CurPtr++;
163 switch (CurChar) {
164 default: return (unsigned char)CurChar;
165 case 0:
166 // A nul character in the stream is either the end of the current buffer or
167 // a random nul in the file. Disambiguate that here.
168 if (CurPtr-1 != CurBuf->getBufferEnd())
169 return 0; // Just whitespace.
170
171 // Otherwise, return end of file.
172 --CurPtr; // Another call to lex will return EOF again.
173 return EOF;
174 case '\n':
175 case '\r':
176 // Handle the newline character by ignoring it and incrementing the line
177 // count. However, be careful about 'dos style' files with \n\r in them.
178 // Only treat a \n\r or \r\n as a single line.
179 if ((*CurPtr == '\n' || (*CurPtr == '\r')) &&
180 *CurPtr != CurChar)
181 ++CurPtr; // Eat the two char newline sequence.
182
183 ++CurLineNo;
184 return '\n';
185 }
186}
187
188
189int LLLexer::LexToken() {
190 TokStart = CurPtr;
191
192 int CurChar = getNextChar();
193
194 switch (CurChar) {
195 default:
196 // Handle letters: [a-zA-Z_]
197 if (isalpha(CurChar) || CurChar == '_')
198 return LexIdentifier();
199
200 return CurChar;
201 case EOF: return YYEOF;
202 case 0:
203 case ' ':
204 case '\t':
205 case '\n':
206 case '\r':
207 // Ignore whitespace.
208 return LexToken();
209 case '+': return LexPositive();
210 case '@': return LexAt();
211 case '%': return LexPercent();
212 case '"': return LexQuote();
213 case '.':
214 if (const char *Ptr = isLabelTail(CurPtr)) {
215 CurPtr = Ptr;
216 llvmAsmlval.StrVal = new std::string(TokStart, CurPtr-1);
217 return LABELSTR;
218 }
219 if (CurPtr[0] == '.' && CurPtr[1] == '.') {
220 CurPtr += 2;
221 return DOTDOTDOT;
222 }
223 return '.';
224 case '$':
225 if (const char *Ptr = isLabelTail(CurPtr)) {
226 CurPtr = Ptr;
227 llvmAsmlval.StrVal = new std::string(TokStart, CurPtr-1);
228 return LABELSTR;
229 }
230 return '$';
231 case ';':
232 SkipLineComment();
233 return LexToken();
234 case '0': case '1': case '2': case '3': case '4':
235 case '5': case '6': case '7': case '8': case '9':
236 case '-':
237 return LexDigitOrNegative();
238 }
239}
240
241void LLLexer::SkipLineComment() {
242 while (1) {
243 if (CurPtr[0] == '\n' || CurPtr[0] == '\r' || getNextChar() == EOF)
244 return;
245 }
246}
247
248/// LexAt - Lex all tokens that start with an @ character:
249/// AtStringConstant @\"[^\"]*\"
250/// GlobalVarName @[-a-zA-Z$._][-a-zA-Z$._0-9]*
251/// GlobalVarID @[0-9]+
252int LLLexer::LexAt() {
253 // Handle AtStringConstant: @\"[^\"]*\"
254 if (CurPtr[0] == '"') {
255 ++CurPtr;
256
257 while (1) {
258 int CurChar = getNextChar();
259
260 if (CurChar == EOF) {
261 GenerateError("End of file in global variable name");
262 return YYERROR;
263 }
264 if (CurChar == '"') {
265 llvmAsmlval.StrVal = new std::string(TokStart+2, CurPtr-1);
266 UnEscapeLexed(*llvmAsmlval.StrVal);
267 return ATSTRINGCONSTANT;
268 }
269 }
270 }
271
272 // Handle GlobalVarName: @[-a-zA-Z$._][-a-zA-Z$._0-9]*
273 if (isalpha(CurPtr[0]) || CurPtr[0] == '-' || CurPtr[0] == '$' ||
274 CurPtr[0] == '.' || CurPtr[0] == '_') {
275 ++CurPtr;
276 while (isalnum(CurPtr[0]) || CurPtr[0] == '-' || CurPtr[0] == '$' ||
277 CurPtr[0] == '.' || CurPtr[0] == '_')
278 ++CurPtr;
279
280 llvmAsmlval.StrVal = new std::string(TokStart+1, CurPtr); // Skip @
281 return GLOBALVAR;
282 }
283
284 // Handle GlobalVarID: @[0-9]+
285 if (isdigit(CurPtr[0])) {
286 for (++CurPtr; isdigit(CurPtr[0]); ++CurPtr);
287
288 uint64_t Val = atoull(TokStart+1, CurPtr);
289 if ((unsigned)Val != Val)
290 GenerateError("Invalid value number (too large)!");
291 llvmAsmlval.UIntVal = unsigned(Val);
292 return GLOBALVAL_ID;
293 }
294
295 return '@';
296}
297
298
299/// LexPercent - Lex all tokens that start with a % character:
300/// PctStringConstant %\"[^\"]*\"
301/// LocalVarName %[-a-zA-Z$._][-a-zA-Z$._0-9]*
302/// LocalVarID %[0-9]+
303int LLLexer::LexPercent() {
304 // Handle PctStringConstant: %\"[^\"]*\"
305 if (CurPtr[0] == '"') {
306 ++CurPtr;
307
308 while (1) {
309 int CurChar = getNextChar();
310
311 if (CurChar == EOF) {
312 GenerateError("End of file in local variable name");
313 return YYERROR;
314 }
315 if (CurChar == '"') {
316 llvmAsmlval.StrVal = new std::string(TokStart+2, CurPtr-1);
317 UnEscapeLexed(*llvmAsmlval.StrVal);
318 return PCTSTRINGCONSTANT;
319 }
320 }
321 }
322
323 // Handle LocalVarName: %[-a-zA-Z$._][-a-zA-Z$._0-9]*
324 if (isalpha(CurPtr[0]) || CurPtr[0] == '-' || CurPtr[0] == '$' ||
325 CurPtr[0] == '.' || CurPtr[0] == '_') {
326 ++CurPtr;
327 while (isalnum(CurPtr[0]) || CurPtr[0] == '-' || CurPtr[0] == '$' ||
328 CurPtr[0] == '.' || CurPtr[0] == '_')
329 ++CurPtr;
330
331 llvmAsmlval.StrVal = new std::string(TokStart+1, CurPtr); // Skip %
332 return LOCALVAR;
333 }
334
335 // Handle LocalVarID: %[0-9]+
336 if (isdigit(CurPtr[0])) {
337 for (++CurPtr; isdigit(CurPtr[0]); ++CurPtr);
338
339 uint64_t Val = atoull(TokStart+1, CurPtr);
340 if ((unsigned)Val != Val)
341 GenerateError("Invalid value number (too large)!");
342 llvmAsmlval.UIntVal = unsigned(Val);
343 return LOCALVAL_ID;
344 }
345
346 return '%';
347}
348
349/// LexQuote - Lex all tokens that start with a " character:
350/// QuoteLabel "[^"]+":
351/// StringConstant "[^"]*"
352int LLLexer::LexQuote() {
353 while (1) {
354 int CurChar = getNextChar();
355
356 if (CurChar == EOF) {
357 GenerateError("End of file in quoted string");
358 return YYERROR;
359 }
360
361 if (CurChar != '"') continue;
362
363 if (CurPtr[0] != ':') {
364 llvmAsmlval.StrVal = new std::string(TokStart+1, CurPtr-1);
365 UnEscapeLexed(*llvmAsmlval.StrVal);
366 return STRINGCONSTANT;
367 }
368
369 ++CurPtr;
370 llvmAsmlval.StrVal = new std::string(TokStart+1, CurPtr-2);
371 UnEscapeLexed(*llvmAsmlval.StrVal);
372 return LABELSTR;
373 }
374}
375
376static bool JustWhitespaceNewLine(const char *&Ptr) {
377 const char *ThisPtr = Ptr;
378 while (*ThisPtr == ' ' || *ThisPtr == '\t')
379 ++ThisPtr;
380 if (*ThisPtr == '\n' || *ThisPtr == '\r') {
381 Ptr = ThisPtr;
382 return true;
383 }
384 return false;
385}
386
387
388/// LexIdentifier: Handle several related productions:
389/// Label [-a-zA-Z$._0-9]+:
390/// IntegerType i[0-9]+
391/// Keyword sdiv, float, ...
392/// HexIntConstant [us]0x[0-9A-Fa-f]+
393int LLLexer::LexIdentifier() {
394 const char *StartChar = CurPtr;
395 const char *IntEnd = CurPtr[-1] == 'i' ? 0 : StartChar;
396 const char *KeywordEnd = 0;
397
398 for (; isLabelChar(*CurPtr); ++CurPtr) {
399 // If we decide this is an integer, remember the end of the sequence.
400 if (!IntEnd && !isdigit(*CurPtr)) IntEnd = CurPtr;
401 if (!KeywordEnd && !isalnum(*CurPtr) && *CurPtr != '_') KeywordEnd = CurPtr;
402 }
403
404 // If we stopped due to a colon, this really is a label.
405 if (*CurPtr == ':') {
406 llvmAsmlval.StrVal = new std::string(StartChar-1, CurPtr++);
407 return LABELSTR;
408 }
409
410 // Otherwise, this wasn't a label. If this was valid as an integer type,
411 // return it.
412 if (IntEnd == 0) IntEnd = CurPtr;
413 if (IntEnd != StartChar) {
414 CurPtr = IntEnd;
415 uint64_t NumBits = atoull(StartChar, CurPtr);
416 if (NumBits < IntegerType::MIN_INT_BITS ||
417 NumBits > IntegerType::MAX_INT_BITS) {
418 GenerateError("Bitwidth for integer type out of range!");
419 return YYERROR;
420 }
421 const Type* Ty = IntegerType::get(NumBits);
422 llvmAsmlval.PrimType = Ty;
423 return INTTYPE;
424 }
425
426 // Otherwise, this was a letter sequence. See which keyword this is.
427 if (KeywordEnd == 0) KeywordEnd = CurPtr;
428 CurPtr = KeywordEnd;
429 --StartChar;
430 unsigned Len = CurPtr-StartChar;
431#define KEYWORD(STR, TOK) \
432 if (Len == strlen(STR) && !memcmp(StartChar, STR, strlen(STR))) return TOK;
433
434 KEYWORD("begin", BEGINTOK);
435 KEYWORD("end", ENDTOK);
436 KEYWORD("true", TRUETOK);
437 KEYWORD("false", FALSETOK);
438 KEYWORD("declare", DECLARE);
439 KEYWORD("define", DEFINE);
440 KEYWORD("global", GLOBAL);
441 KEYWORD("constant", CONSTANT);
442
443 KEYWORD("internal", INTERNAL);
444 KEYWORD("linkonce", LINKONCE);
445 KEYWORD("weak", WEAK);
446 KEYWORD("appending", APPENDING);
447 KEYWORD("dllimport", DLLIMPORT);
448 KEYWORD("dllexport", DLLEXPORT);
449 KEYWORD("hidden", HIDDEN);
450 KEYWORD("protected", PROTECTED);
451 KEYWORD("extern_weak", EXTERN_WEAK);
452 KEYWORD("external", EXTERNAL);
453 KEYWORD("thread_local", THREAD_LOCAL);
454 KEYWORD("zeroinitializer", ZEROINITIALIZER);
455 KEYWORD("undef", UNDEF);
456 KEYWORD("null", NULL_TOK);
457 KEYWORD("to", TO);
458 KEYWORD("tail", TAIL);
459 KEYWORD("target", TARGET);
460 KEYWORD("triple", TRIPLE);
461 KEYWORD("deplibs", DEPLIBS);
462 KEYWORD("datalayout", DATALAYOUT);
463 KEYWORD("volatile", VOLATILE);
464 KEYWORD("align", ALIGN);
465 KEYWORD("section", SECTION);
466 KEYWORD("alias", ALIAS);
467 KEYWORD("module", MODULE);
468 KEYWORD("asm", ASM_TOK);
469 KEYWORD("sideeffect", SIDEEFFECT);
470
471 KEYWORD("cc", CC_TOK);
472 KEYWORD("ccc", CCC_TOK);
473 KEYWORD("fastcc", FASTCC_TOK);
474 KEYWORD("coldcc", COLDCC_TOK);
475 KEYWORD("x86_stdcallcc", X86_STDCALLCC_TOK);
476 KEYWORD("x86_fastcallcc", X86_FASTCALLCC_TOK);
477
478 KEYWORD("signext", SIGNEXT);
479 KEYWORD("zeroext", ZEROEXT);
480 KEYWORD("inreg", INREG);
481 KEYWORD("sret", SRET);
482 KEYWORD("nounwind", NOUNWIND);
483 KEYWORD("noreturn", NORETURN);
484 KEYWORD("noalias", NOALIAS);
485 KEYWORD("byval", BYVAL);
486 KEYWORD("nest", NEST);
487 KEYWORD("pure", PURE);
488 KEYWORD("const", CONST);
489
490 KEYWORD("type", TYPE);
491 KEYWORD("opaque", OPAQUE);
492
493 KEYWORD("eq" , EQ);
494 KEYWORD("ne" , NE);
495 KEYWORD("slt", SLT);
496 KEYWORD("sgt", SGT);
497 KEYWORD("sle", SLE);
498 KEYWORD("sge", SGE);
499 KEYWORD("ult", ULT);
500 KEYWORD("ugt", UGT);
501 KEYWORD("ule", ULE);
502 KEYWORD("uge", UGE);
503 KEYWORD("oeq", OEQ);
504 KEYWORD("one", ONE);
505 KEYWORD("olt", OLT);
506 KEYWORD("ogt", OGT);
507 KEYWORD("ole", OLE);
508 KEYWORD("oge", OGE);
509 KEYWORD("ord", ORD);
510 KEYWORD("uno", UNO);
511 KEYWORD("ueq", UEQ);
512 KEYWORD("une", UNE);
513#undef KEYWORD
514
515 // Keywords for types.
516#define TYPEKEYWORD(STR, LLVMTY, TOK) \
517 if (Len == strlen(STR) && !memcmp(StartChar, STR, strlen(STR))) { \
518 llvmAsmlval.PrimType = LLVMTY; return TOK; }
519 TYPEKEYWORD("void", Type::VoidTy, VOID);
520 TYPEKEYWORD("float", Type::FloatTy, FLOAT);
521 TYPEKEYWORD("double", Type::DoubleTy, DOUBLE);
522 TYPEKEYWORD("x86_fp80", Type::X86_FP80Ty, X86_FP80);
523 TYPEKEYWORD("fp128", Type::FP128Ty, FP128);
524 TYPEKEYWORD("ppc_fp128", Type::PPC_FP128Ty, PPC_FP128);
525 TYPEKEYWORD("label", Type::LabelTy, LABEL);
526#undef TYPEKEYWORD
527
528 // Handle special forms for autoupgrading. Drop these in LLVM 3.0. This is
529 // to avoid conflicting with the sext/zext instructions, below.
530 if (Len == 4 && !memcmp(StartChar, "sext", 4)) {
531 // Scan CurPtr ahead, seeing if there is just whitespace before the newline.
532 if (JustWhitespaceNewLine(CurPtr))
533 return SIGNEXT;
534 } else if (Len == 4 && !memcmp(StartChar, "zext", 4)) {
535 // Scan CurPtr ahead, seeing if there is just whitespace before the newline.
536 if (JustWhitespaceNewLine(CurPtr))
537 return ZEROEXT;
538 }
539
540 // Keywords for instructions.
541#define INSTKEYWORD(STR, type, Enum, TOK) \
542 if (Len == strlen(STR) && !memcmp(StartChar, STR, strlen(STR))) { \
543 llvmAsmlval.type = Instruction::Enum; return TOK; }
544
545 INSTKEYWORD("add", BinaryOpVal, Add, ADD);
546 INSTKEYWORD("sub", BinaryOpVal, Sub, SUB);
547 INSTKEYWORD("mul", BinaryOpVal, Mul, MUL);
548 INSTKEYWORD("udiv", BinaryOpVal, UDiv, UDIV);
549 INSTKEYWORD("sdiv", BinaryOpVal, SDiv, SDIV);
550 INSTKEYWORD("fdiv", BinaryOpVal, FDiv, FDIV);
551 INSTKEYWORD("urem", BinaryOpVal, URem, UREM);
552 INSTKEYWORD("srem", BinaryOpVal, SRem, SREM);
553 INSTKEYWORD("frem", BinaryOpVal, FRem, FREM);
554 INSTKEYWORD("shl", BinaryOpVal, Shl, SHL);
555 INSTKEYWORD("lshr", BinaryOpVal, LShr, LSHR);
556 INSTKEYWORD("ashr", BinaryOpVal, AShr, ASHR);
557 INSTKEYWORD("and", BinaryOpVal, And, AND);
558 INSTKEYWORD("or", BinaryOpVal, Or , OR );
559 INSTKEYWORD("xor", BinaryOpVal, Xor, XOR);
560 INSTKEYWORD("icmp", OtherOpVal, ICmp, ICMP);
561 INSTKEYWORD("fcmp", OtherOpVal, FCmp, FCMP);
562
563 INSTKEYWORD("phi", OtherOpVal, PHI, PHI_TOK);
564 INSTKEYWORD("call", OtherOpVal, Call, CALL);
565 INSTKEYWORD("trunc", CastOpVal, Trunc, TRUNC);
566 INSTKEYWORD("zext", CastOpVal, ZExt, ZEXT);
567 INSTKEYWORD("sext", CastOpVal, SExt, SEXT);
568 INSTKEYWORD("fptrunc", CastOpVal, FPTrunc, FPTRUNC);
569 INSTKEYWORD("fpext", CastOpVal, FPExt, FPEXT);
570 INSTKEYWORD("uitofp", CastOpVal, UIToFP, UITOFP);
571 INSTKEYWORD("sitofp", CastOpVal, SIToFP, SITOFP);
572 INSTKEYWORD("fptoui", CastOpVal, FPToUI, FPTOUI);
573 INSTKEYWORD("fptosi", CastOpVal, FPToSI, FPTOSI);
574 INSTKEYWORD("inttoptr", CastOpVal, IntToPtr, INTTOPTR);
575 INSTKEYWORD("ptrtoint", CastOpVal, PtrToInt, PTRTOINT);
576 INSTKEYWORD("bitcast", CastOpVal, BitCast, BITCAST);
577 INSTKEYWORD("select", OtherOpVal, Select, SELECT);
578 INSTKEYWORD("va_arg", OtherOpVal, VAArg , VAARG);
579 INSTKEYWORD("ret", TermOpVal, Ret, RET);
580 INSTKEYWORD("br", TermOpVal, Br, BR);
581 INSTKEYWORD("switch", TermOpVal, Switch, SWITCH);
582 INSTKEYWORD("invoke", TermOpVal, Invoke, INVOKE);
583 INSTKEYWORD("unwind", TermOpVal, Unwind, UNWIND);
584 INSTKEYWORD("unreachable", TermOpVal, Unreachable, UNREACHABLE);
585
586 INSTKEYWORD("malloc", MemOpVal, Malloc, MALLOC);
587 INSTKEYWORD("alloca", MemOpVal, Alloca, ALLOCA);
588 INSTKEYWORD("free", MemOpVal, Free, FREE);
589 INSTKEYWORD("load", MemOpVal, Load, LOAD);
590 INSTKEYWORD("store", MemOpVal, Store, STORE);
591 INSTKEYWORD("getelementptr", MemOpVal, GetElementPtr, GETELEMENTPTR);
592
593 INSTKEYWORD("extractelement", OtherOpVal, ExtractElement, EXTRACTELEMENT);
594 INSTKEYWORD("insertelement", OtherOpVal, InsertElement, INSERTELEMENT);
595 INSTKEYWORD("shufflevector", OtherOpVal, ShuffleVector, SHUFFLEVECTOR);
596#undef INSTKEYWORD
597
598 // Check for [us]0x[0-9A-Fa-f]+ which are Hexadecimal constant generated by
599 // the CFE to avoid forcing it to deal with 64-bit numbers.
600 if ((TokStart[0] == 'u' || TokStart[0] == 's') &&
601 TokStart[1] == '0' && TokStart[2] == 'x' && isxdigit(TokStart[3])) {
602 int len = CurPtr-TokStart-3;
603 uint32_t bits = len * 4;
604 APInt Tmp(bits, TokStart+3, len, 16);
605 uint32_t activeBits = Tmp.getActiveBits();
606 if (activeBits > 0 && activeBits < bits)
607 Tmp.trunc(activeBits);
608 if (Tmp.getBitWidth() > 64) {
609 llvmAsmlval.APIntVal = new APInt(Tmp);
610 return TokStart[0] == 's' ? ESAPINTVAL : EUAPINTVAL;
611 } else if (TokStart[0] == 's') {
612 llvmAsmlval.SInt64Val = Tmp.getSExtValue();
613 return ESINT64VAL;
614 } else {
615 llvmAsmlval.UInt64Val = Tmp.getZExtValue();
616 return EUINT64VAL;
617 }
618 }
619
Chris Lattner4ce0df62007-11-18 18:43:24 +0000620 // If this is "cc1234", return this as just "cc".
Chris Lattner8e3a8e02007-11-18 08:46:26 +0000621 if (TokStart[0] == 'c' && TokStart[1] == 'c') {
622 CurPtr = TokStart+2;
623 return CC_TOK;
624 }
625
Chris Lattner4ce0df62007-11-18 18:43:24 +0000626 // If this starts with "call", return it as CALL. This is to support old
627 // broken .ll files. FIXME: remove this with LLVM 3.0.
628 if (CurPtr-TokStart > 4 && !memcmp(TokStart, "call", 4)) {
629 CurPtr = TokStart+4;
630 llvmAsmlval.OtherOpVal = Instruction::Call;
631 return CALL;
632 }
633
Chris Lattner8e3a8e02007-11-18 08:46:26 +0000634 // Finally, if this isn't known, return just a single character.
635 CurPtr = TokStart+1;
636 return TokStart[0];
637}
638
639
640/// Lex0x: Handle productions that start with 0x, knowing that it matches and
641/// that this is not a label:
642/// HexFPConstant 0x[0-9A-Fa-f]+
643/// HexFP80Constant 0xK[0-9A-Fa-f]+
644/// HexFP128Constant 0xL[0-9A-Fa-f]+
645/// HexPPC128Constant 0xM[0-9A-Fa-f]+
646int LLLexer::Lex0x() {
647 CurPtr = TokStart + 2;
648
649 char Kind;
650 if (CurPtr[0] >= 'K' && CurPtr[0] <= 'M') {
651 Kind = *CurPtr++;
652 } else {
653 Kind = 'J';
654 }
655
656 if (!isxdigit(CurPtr[0])) {
657 // Bad token, return it as just zero.
658 CurPtr = TokStart+1;
659 return '0';
660 }
661
662 while (isxdigit(CurPtr[0]))
663 ++CurPtr;
664
665 if (Kind == 'J') {
666 // HexFPConstant - Floating point constant represented in IEEE format as a
667 // hexadecimal number for when exponential notation is not precise enough.
668 // Float and double only.
669 llvmAsmlval.FPVal = new APFloat(HexToFP(TokStart+2, CurPtr));
670 return FPVAL;
671 }
672
673 uint64_t Pair[2];
674 HexToIntPair(TokStart+3, CurPtr, Pair);
675 switch (Kind) {
676 default: assert(0 && "Unknown kind!");
677 case 'K':
678 // F80HexFPConstant - x87 long double in hexadecimal format (10 bytes)
679 llvmAsmlval.FPVal = new APFloat(APInt(80, 2, Pair));
680 return FPVAL;
681 case 'L':
682 // F128HexFPConstant - IEEE 128-bit in hexadecimal format (16 bytes)
683 llvmAsmlval.FPVal = new APFloat(APInt(128, 2, Pair), true);
684 return FPVAL;
685 case 'M':
686 // PPC128HexFPConstant - PowerPC 128-bit in hexadecimal format (16 bytes)
687 llvmAsmlval.FPVal = new APFloat(APInt(128, 2, Pair));
688 return FPVAL;
689 }
690}
691
692/// LexIdentifier: Handle several related productions:
693/// Label [-a-zA-Z$._0-9]+:
694/// NInteger -[0-9]+
695/// FPConstant [-+]?[0-9]+[.][0-9]*([eE][-+]?[0-9]+)?
696/// PInteger [0-9]+
697/// HexFPConstant 0x[0-9A-Fa-f]+
698/// HexFP80Constant 0xK[0-9A-Fa-f]+
699/// HexFP128Constant 0xL[0-9A-Fa-f]+
700/// HexPPC128Constant 0xM[0-9A-Fa-f]+
701int LLLexer::LexDigitOrNegative() {
702 // If the letter after the negative is a number, this is probably a label.
703 if (!isdigit(TokStart[0]) && !isdigit(CurPtr[0])) {
704 // Okay, this is not a number after the -, it's probably a label.
705 if (const char *End = isLabelTail(CurPtr)) {
706 llvmAsmlval.StrVal = new std::string(TokStart, End-1);
707 CurPtr = End;
708 return LABELSTR;
709 }
710
711 return CurPtr[-1];
712 }
713
714 // At this point, it is either a label, int or fp constant.
715
716 // Skip digits, we have at least one.
717 for (; isdigit(CurPtr[0]); ++CurPtr);
718
719 // Check to see if this really is a label afterall, e.g. "-1:".
720 if (isLabelChar(CurPtr[0]) || CurPtr[0] == ':') {
721 if (const char *End = isLabelTail(CurPtr)) {
722 llvmAsmlval.StrVal = new std::string(TokStart, End-1);
723 CurPtr = End;
724 return LABELSTR;
725 }
726 }
727
728 // If the next character is a '.', then it is a fp value, otherwise its
729 // integer.
730 if (CurPtr[0] != '.') {
731 if (TokStart[0] == '0' && TokStart[1] == 'x')
732 return Lex0x();
733 unsigned Len = CurPtr-TokStart;
734 uint32_t numBits = ((Len * 64) / 19) + 2;
735 APInt Tmp(numBits, TokStart, Len, 10);
736 if (TokStart[0] == '-') {
737 uint32_t minBits = Tmp.getMinSignedBits();
738 if (minBits > 0 && minBits < numBits)
739 Tmp.trunc(minBits);
740 if (Tmp.getBitWidth() > 64) {
741 llvmAsmlval.APIntVal = new APInt(Tmp);
742 return ESAPINTVAL;
743 } else {
744 llvmAsmlval.SInt64Val = Tmp.getSExtValue();
745 return ESINT64VAL;
746 }
747 } else {
748 uint32_t activeBits = Tmp.getActiveBits();
749 if (activeBits > 0 && activeBits < numBits)
750 Tmp.trunc(activeBits);
751 if (Tmp.getBitWidth() > 64) {
752 llvmAsmlval.APIntVal = new APInt(Tmp);
753 return EUAPINTVAL;
754 } else {
755 llvmAsmlval.UInt64Val = Tmp.getZExtValue();
756 return EUINT64VAL;
757 }
758 }
759 }
760
761 ++CurPtr;
762
763 // Skip over [0-9]*([eE][-+]?[0-9]+)?
764 while (isdigit(CurPtr[0])) ++CurPtr;
765
766 if (CurPtr[0] == 'e' || CurPtr[0] == 'E') {
767 if (isdigit(CurPtr[1]) ||
768 ((CurPtr[1] == '-' || CurPtr[1] == '+') && isdigit(CurPtr[2]))) {
769 CurPtr += 2;
770 while (isdigit(CurPtr[0])) ++CurPtr;
771 }
772 }
773
774 llvmAsmlval.FPVal = new APFloat(atof(TokStart));
775 return FPVAL;
776}
777
778/// FPConstant [-+]?[0-9]+[.][0-9]*([eE][-+]?[0-9]+)?
779int LLLexer::LexPositive() {
780 // If the letter after the negative is a number, this is probably not a
781 // label.
782 if (!isdigit(CurPtr[0]))
783 return CurPtr[-1];
784
785 // Skip digits.
786 for (++CurPtr; isdigit(CurPtr[0]); ++CurPtr);
787
788 // At this point, we need a '.'.
789 if (CurPtr[0] != '.') {
790 CurPtr = TokStart+1;
791 return TokStart[0];
792 }
793
794 ++CurPtr;
795
796 // Skip over [0-9]*([eE][-+]?[0-9]+)?
797 while (isdigit(CurPtr[0])) ++CurPtr;
798
799 if (CurPtr[0] == 'e' || CurPtr[0] == 'E') {
800 if (isdigit(CurPtr[1]) ||
801 ((CurPtr[1] == '-' || CurPtr[1] == '+') && isdigit(CurPtr[2]))) {
802 CurPtr += 2;
803 while (isdigit(CurPtr[0])) ++CurPtr;
804 }
805 }
806
807 llvmAsmlval.FPVal = new APFloat(atof(TokStart));
808 return FPVAL;
809}
810
811
812//===----------------------------------------------------------------------===//
813// Define the interface to this file.
814//===----------------------------------------------------------------------===//
815
816static LLLexer *TheLexer;
817
818void InitLLLexer(llvm::MemoryBuffer *MB) {
819 assert(TheLexer == 0 && "LL Lexer isn't reentrant yet");
820 TheLexer = new LLLexer(MB);
821}
822
823int llvmAsmlex() {
824 return TheLexer->LexToken();
825}
826const char *LLLgetTokenStart() { return TheLexer->getTokStart(); }
827unsigned LLLgetTokenLength() { return TheLexer->getTokLength(); }
828std::string LLLgetFilename() { return TheLexer->getFilename(); }
829unsigned LLLgetLineNo() { return TheLexer->getLineNo(); }
830
831void FreeLexer() {
832 delete TheLexer;
833 TheLexer = 0;
834}