Push location information more tightly into the IR, providing space for every
operation and statement to have a location, and make it so a location is
required to be specified whenever you make one (though a null location is still
allowed). This is to encourage compiler authors to propagate loc info
properly, allowing our failability story to work well.
This is still a WIP - it isn't clear if we want to continue abusing Attribute
for location information, or whether we should introduce a new class heirarchy
to do so. This is good step along the way, and unblocks some of the tf/xla
work that builds upon it.
PiperOrigin-RevId: 210001406
diff --git a/lib/Parser/Parser.cpp b/lib/Parser/Parser.cpp
index de82e45..829769e 100644
--- a/lib/Parser/Parser.cpp
+++ b/lib/Parser/Parser.cpp
@@ -120,6 +120,10 @@
const Token &getToken() const { return state.curToken; }
StringRef getTokenSpelling() const { return state.curToken.getSpelling(); }
+ /// Encode the specified source location information into an attribute for
+ /// attachment to the IR.
+ Attribute *getEncodedSourceLocation(llvm::SMLoc loc);
+
/// Emit an error and return failure.
ParseResult emitError(const Twine &message) {
return emitError(state.curToken.getLoc(), message);
@@ -203,6 +207,20 @@
// Helper methods.
//===----------------------------------------------------------------------===//
+/// Encode the specified source location information into an attribute for
+/// attachment to the IR.
+Attribute *Parser::getEncodedSourceLocation(llvm::SMLoc loc) {
+ // TODO(clattner): Switch to an more structured form that includes
+ // file/line/column instead of just byte offset in the file. This will
+ // eliminate this block of low level code poking at the SourceMgr directly.
+ auto &sourceMgr = getSourceMgr();
+ auto fileID = sourceMgr.FindBufferContainingLoc(loc);
+
+ auto *srcBuffer = sourceMgr.getMemoryBuffer(fileID);
+ unsigned locationEncoding = loc.getPointer() - srcBuffer->getBufferStart();
+ return builder.getIntegerAttr(locationEncoding);
+}
+
ParseResult Parser::emitError(SMLoc loc, const Twine &message) {
// If we hit a parse error in response to a lexer error, then the lexer
// already reported the error.
@@ -1367,8 +1385,11 @@
// We create these placeholders as having an empty name, which we know cannot
// be created through normal user input, allowing us to distinguish them.
auto name = Identifier::get("placeholder", getContext());
- auto *inst = OperationInst::create(name, /*operands*/ {}, type, /*attrs*/ {},
- getContext());
+ auto *inst =
+ // FIXME(clattner): encode the location into the placeholder instead of
+ // into the forwardReferencePlaceholders map!
+ OperationInst::create(/*location=*/nullptr, name, /*operands=*/{}, type,
+ /*attrs=*/{}, getContext());
forwardReferencePlaceholders[inst->getResult(0)] = loc;
return inst->getResult(0);
}
@@ -1606,19 +1627,6 @@
if (!op)
return ParseFailure;
- // Apply location information to the instruction.
- // TODO(clattner): make this more principled. We shouldn't overwrite existing
- // location info, we should use a better serialized form, and we shouldn't
- // be using the :location attribute. This is also pretty inefficient.
- {
- auto &sourceMgr = getSourceMgr();
- auto fileID = sourceMgr.FindBufferContainingLoc(loc);
- auto *srcBuffer = sourceMgr.getMemoryBuffer(fileID);
- unsigned locationEncoding = loc.getPointer() - srcBuffer->getBufferStart();
- op->setAttr(builder.getIdentifier(":location"),
- builder.getIntegerAttr(locationEncoding));
- }
-
// We just parsed an operation. If it is a recognized one, verify that it
// is structurally as we expect. If not, produce an error with a reasonable
// source location.
@@ -1642,13 +1650,17 @@
Operation *FunctionParser::parseVerboseOperation(
const CreateOperationFunction &createOpFunc) {
+
+ // Get location information for the operation.
+ auto *srcLocation = getEncodedSourceLocation(getToken().getLoc());
+
auto name = getToken().getStringValue();
if (name.empty())
return (emitError("empty operation name is invalid"), nullptr);
consumeToken(Token::string);
- OperationState result(builder.getContext(), name);
+ OperationState result(builder.getContext(), srcLocation, name);
// Parse the operand list.
SmallVector<SSAUseInfo, 8> operandInfos;
@@ -1915,8 +1927,11 @@
llvm::PrettyStackTraceFormat fmt("MLIR Parser: custom op parser '%s'",
opNameStr.c_str());
+ // Get location information for the operation.
+ auto *srcLocation = getEncodedSourceLocation(opLoc);
+
// Have the op implementation take a crack and parsing this.
- OperationState opState(builder.getContext(), opName);
+ OperationState opState(builder.getContext(), srcLocation, opName);
if (opDefinition->parseAssembly(&opAsmParser, &opState))
return nullptr;
@@ -2100,6 +2115,8 @@
/// terminator-stmt ::= `return` ssa-use-and-type-list?
///
TerminatorInst *CFGFunctionParser::parseTerminator() {
+ auto loc = getToken().getLoc();
+
switch (getToken().getKind()) {
default:
return (emitError("expected terminator at end of basic block"), nullptr);
@@ -2111,7 +2128,7 @@
SmallVector<CFGValue *, 8> operands;
if (parseOptionalSSAUseAndTypeList(operands, /*isParenthesized*/ false))
return nullptr;
- return builder.createReturnInst(operands);
+ return builder.createReturnInst(getEncodedSourceLocation(loc), operands);
}
case Token::kw_br: {
@@ -2120,7 +2137,8 @@
SmallVector<CFGValue *, 4> values;
if (parseBranchBlockAndUseList(destBB, values))
return nullptr;
- auto branch = builder.createBranchInst(destBB);
+ auto branch =
+ builder.createBranchInst(getEncodedSourceLocation(loc), destBB);
branch->addOperands(values);
return branch;
}
@@ -2149,7 +2167,8 @@
if (parseBranchBlockAndUseList(falseBlock, falseOperands))
return nullptr;
- auto branch = builder.createCondBranchInst(cast<CFGValue>(cond), trueBlock,
+ auto branch = builder.createCondBranchInst(getEncodedSourceLocation(loc),
+ cast<CFGValue>(cond), trueBlock,
falseBlock);
branch->addTrueOperands(trueOperands);
branch->addFalseOperands(falseOperands);
@@ -2240,7 +2259,8 @@
}
// Create for statement.
- ForStmt *forStmt = builder.createFor(lowerBound, upperBound, step);
+ ForStmt *forStmt = builder.createFor(getEncodedSourceLocation(loc),
+ lowerBound, upperBound, step);
// Create SSA value definition for the induction variable.
if (addDefinition({inductionVariableName, 0, loc}, forStmt))
@@ -2394,6 +2414,7 @@
/// | ml-if-head `else` `{` ml-stmt* `}`
///
ParseResult MLFunctionParser::parseIfStmt() {
+ auto loc = getToken().getLoc();
consumeToken(Token::kw_if);
if (parseToken(Token::l_paren, "expected '('"))
@@ -2406,7 +2427,7 @@
if (parseToken(Token::r_paren, "expected ')'"))
return ParseFailure;
- IfStmt *ifStmt = builder.createIf(condition);
+ IfStmt *ifStmt = builder.createIf(getEncodedSourceLocation(loc), condition);
IfClause *thenClause = ifStmt->getThen();
// When parsing of an if statement body fails, the IR contains