blob: 4c3f9e4258e5deac04005e90598f332b4508b451 [file] [log] [blame]
Chris Lattnere79379a2018-06-22 10:39:19 -07001//===- Token.cpp - MLIR Token Implementation ------------------------------===//
2//
3// Copyright 2019 The MLIR Authors.
4//
5// Licensed under the Apache License, Version 2.0 (the "License");
6// you may not use this file except in compliance with the License.
7// You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing, software
12// distributed under the License is distributed on an "AS IS" BASIS,
13// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14// See the License for the specific language governing permissions and
15// limitations under the License.
16// =============================================================================
17//
18// This file implements the Token class for the MLIR textual form.
19//
20//===----------------------------------------------------------------------===//
21
22#include "Token.h"
Chris Lattner0497c4b2018-08-15 09:09:54 -070023#include "llvm/ADT/StringExtras.h"
Chris Lattnere79379a2018-06-22 10:39:19 -070024using namespace mlir;
25using llvm::SMLoc;
26using llvm::SMRange;
27
28SMLoc Token::getLoc() const {
29 return SMLoc::getFromPointer(spelling.data());
30}
31
32SMLoc Token::getEndLoc() const {
33 return SMLoc::getFromPointer(spelling.data() + spelling.size());
34}
35
36SMRange Token::getLocRange() const {
37 return SMRange(getLoc(), getEndLoc());
38}
Chris Lattnerbb8fafc2018-06-22 15:52:02 -070039
40/// For an integer token, return its value as an unsigned. If it doesn't fit,
41/// return None.
Chris Lattnered65a732018-06-28 20:45:33 -070042Optional<unsigned> Token::getUnsignedIntegerValue() const {
Chris Lattnerbb8fafc2018-06-22 15:52:02 -070043 bool isHex = spelling.size() > 1 && spelling[1] == 'x';
44
45 unsigned result = 0;
46 if (spelling.getAsInteger(isHex ? 0 : 10, result))
47 return None;
48 return result;
49}
Chris Lattnered65a732018-06-28 20:45:33 -070050
Chris Lattner7121b802018-07-04 20:45:39 -070051/// For an integer token, return its value as a uint64_t. If it doesn't fit,
52/// return None.
53Optional<uint64_t> Token::getUInt64IntegerValue() const {
54 bool isHex = spelling.size() > 1 && spelling[1] == 'x';
55
56 uint64_t result = 0;
57 if (spelling.getAsInteger(isHex ? 0 : 10, result))
58 return None;
59 return result;
60}
61
Jacques Pienaar84491092018-07-31 17:15:15 -070062/// For a floatliteral, return its value as a double. Return None if the value
63/// underflows or overflows.
64Optional<double> Token::getFloatingPointValue() const {
65 double result = 0;
66 if (spelling.getAsDouble(result))
67 return None;
68 return result;
69}
Chris Lattner7121b802018-07-04 20:45:39 -070070
Chris Lattnerf958bbe2018-06-29 22:08:05 -070071/// For an inttype token, return its bitwidth.
72Optional<unsigned> Token::getIntTypeBitwidth() const {
Chris Lattner7121b802018-07-04 20:45:39 -070073 unsigned result = 0;
Chris Lattnerf958bbe2018-06-29 22:08:05 -070074 if (spelling[1] == '0' ||
75 spelling.drop_front().getAsInteger(10, result) ||
76 // Arbitrary but large limit on bitwidth.
77 result > 4096 || result == 0)
78 return None;
79 return result;
80}
81
Chris Lattnered65a732018-06-28 20:45:33 -070082/// Given a 'string' token, return its value, including removing the quote
Chris Lattner0497c4b2018-08-15 09:09:54 -070083/// characters and unescaping the contents of the string. The lexer has already
84/// verified that this token is valid.
Chris Lattnered65a732018-06-28 20:45:33 -070085std::string Token::getStringValue() const {
Chris Lattner0497c4b2018-08-15 09:09:54 -070086 assert(getKind() == string);
James Molloy3cdb8aa2018-08-14 01:16:45 -070087 // Start by dropping the quotes.
Chris Lattner0497c4b2018-08-15 09:09:54 -070088 StringRef bytes = getSpelling().drop_front().drop_back();
89
90 std::string result;
91 result.reserve(bytes.size());
92 for (unsigned i = 0, e = bytes.size(); i != e;) {
93 auto c = bytes[i++];
94 if (c != '\\') {
95 result.push_back(c);
96 continue;
97 }
98
99 assert(i + 1 < e && "invalid string should be caught by lexer");
100 auto c1 = bytes[i++];
101 switch (c1) {
102 case '"':
103 case '\\':
104 result.push_back(c1);
105 continue;
106 case 'n':
107 result.push_back('\n');
108 continue;
109 case 't':
110 result.push_back('\t');
111 continue;
112 default:
113 break;
114 }
115
James Molloy14ad73f2018-08-16 08:43:55 -0700116 assert(i + 1 <= e && "invalid string should be caught by lexer");
Chris Lattner0497c4b2018-08-15 09:09:54 -0700117 auto c2 = bytes[i++];
118
119 assert(llvm::isHexDigit(c1) && llvm::isHexDigit(c2) && "invalid escape");
120 result.push_back((llvm::hexDigitValue(c1) << 4) | llvm::hexDigitValue(c2));
121 }
122
123 return result;
Chris Lattnered65a732018-06-28 20:45:33 -0700124}
Chris Lattner8da0c282018-06-29 11:15:56 -0700125
Chris Lattner6119d382018-07-20 18:41:34 -0700126/// Given a hash_identifier token like #123, try to parse the number out of
127/// the identifier, returning None if it is a named identifier like #x or
128/// if the integer doesn't fit.
129Optional<unsigned> Token::getHashIdentifierNumber() const {
130 assert(getKind() == hash_identifier);
131 unsigned result = 0;
132 if (spelling.drop_front().getAsInteger(10, result))
133 return None;
134 return result;
135}
Chris Lattner8da0c282018-06-29 11:15:56 -0700136
137/// Given a punctuation or keyword token kind, return the spelling of the
138/// token as a string. Warning: This will abort on markers, identifiers and
139/// literal tokens since they have no fixed spelling.
140StringRef Token::getTokenSpelling(Kind kind) {
Chris Lattner7121b802018-07-04 20:45:39 -0700141 switch (kind) {
Jacques Pienaar16916002018-07-07 15:48:05 -0700142 default: llvm_unreachable("This token kind has no fixed spelling");
Chris Lattner8da0c282018-06-29 11:15:56 -0700143#define TOK_PUNCTUATION(NAME, SPELLING) case NAME: return SPELLING;
Uday Bondhugulafaf37dd2018-06-29 18:09:29 -0700144#define TOK_OPERATOR(NAME, SPELLING) case NAME: return SPELLING;
Chris Lattner8da0c282018-06-29 11:15:56 -0700145#define TOK_KEYWORD(SPELLING) case kw_##SPELLING: return #SPELLING;
146#include "TokenKinds.def"
Chris Lattner7121b802018-07-04 20:45:39 -0700147 }
148}
149
150/// Return true if this is one of the keyword token kinds (e.g. kw_if).
151bool Token::isKeyword() const {
152 switch (kind) {
153 default: return false;
154#define TOK_KEYWORD(SPELLING) case kw_##SPELLING: return true;
155#include "TokenKinds.def"
156 }
Chris Lattner8da0c282018-06-29 11:15:56 -0700157}