Translate GEP instructions on structs to GEP on int8*
http://b/24142721
X86 data layout has 64-bit scalars aligned at 4-byte boundaries, while
ARM aligns these data at 8-byte boundaries. To bring frontend- and
backend-generated code into agreement, this patch adds a pass to
translate GEPs on structs to GEPs with int8* operands and byte offsets.
This pass runs only on X86 targets.
Change-Id: I961b2bc7db9f57f15126f27396db42c9deedfd43
(cherry picked from commit d2d5ee3893220cec256c829a4740a718232f84ac)
diff --git a/include/bcc/Compiler.h b/include/bcc/Compiler.h
index a0925b8..1097bad 100644
--- a/include/bcc/Compiler.h
+++ b/include/bcc/Compiler.h
@@ -68,7 +68,9 @@
kErrInvalidSource,
- kIllegalGlobalFunction
+ kIllegalGlobalFunction,
+
+ kErrInvalidTargetMachine
};
static const char *GetErrorString(enum ErrorCode pErrCode);
@@ -116,6 +118,8 @@
// all RenderScript functions. Returns error if any external function that is
// not in this whitelist is callable from the script.
enum ErrorCode screenGlobalFunctions(Script &pScript);
+
+ void translateGEPs(Script &pScript);
};
} // end namespace bcc
diff --git a/include/bcc/Config/Config.h b/include/bcc/Config/Config.h
index 1b7e4b2..8293827 100644
--- a/include/bcc/Config/Config.h
+++ b/include/bcc/Config/Config.h
@@ -84,6 +84,9 @@
// Custom DataLayout string for X86 with i64 and f64 set to match the ARM32
// alignment requirement of 64-bits.
#define X86_CUSTOM_DL_STRING "e-m:e-p:32:32-i64:64-f64:64:64-f80:32-n8:16:32-S128"
+// Default DataLayout string for X86. Present to detect future LLVM datalayout
+// changes so X86_CUSTOM_DL_STRING above can be modified appropriately.
+#define X86_DEFAULT_DL_STRING "e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128"
#if defined(DEFAULT_ARM_CODEGEN)
#define DEFAULT_TARGET_TRIPLE_STRING DEFAULT_ARM_TRIPLE_STRING
diff --git a/include/bcc/Renderscript/RSTransforms.h b/include/bcc/Renderscript/RSTransforms.h
index 66353a3..04e8460 100644
--- a/include/bcc/Renderscript/RSTransforms.h
+++ b/include/bcc/Renderscript/RSTransforms.h
@@ -47,6 +47,8 @@
llvm::ModulePass * createRSAddDebugInfoPass();
+llvm::FunctionPass *createRSX86TranslateGEPPass();
+
} // end namespace bcc
#endif // BCC_RS_TRANSFORMS_H
diff --git a/lib/Core/Compiler.cpp b/lib/Core/Compiler.cpp
index 4062294..7b291f0 100644
--- a/lib/Core/Compiler.cpp
+++ b/lib/Core/Compiler.cpp
@@ -32,6 +32,7 @@
#include <llvm/Transforms/Vectorize.h>
#include "bcc/Assert.h"
+#include "bcc/Config/Config.h"
#include "bcc/Renderscript/RSScript.h"
#include "bcc/Renderscript/RSTransforms.h"
#include "bcc/Script.h"
@@ -74,6 +75,8 @@
return "Error loading input bitcode";
case kIllegalGlobalFunction:
return "Use of undefined external function";
+ case kErrInvalidTargetMachine:
+ return "Invalid/unexpected llvm::TargetMachine.";
}
// This assert should never be reached as the compiler verifies that the
@@ -251,6 +254,15 @@
return kErrInvalidSource;
}
+ if (getTargetMachine().getTargetTriple().getArch() == llvm::Triple::x86) {
+ // Detect and fail if TargetMachine datalayout is different than what we
+ // expect. This is to detect changes in default target layout for x86 and
+ // update X86_CUSTOM_DL_STRING in include/bcc/Config/Config.h appropriately.
+ if (dl.getStringRepresentation().compare(X86_DEFAULT_DL_STRING) != 0) {
+ return kErrInvalidTargetMachine;
+ }
+ }
+
// Sanitize module's target information.
module.setTargetTriple(getTargetMachine().getTargetTriple().str());
module.setDataLayout(getTargetMachine().createDataLayout());
@@ -453,3 +465,11 @@
return kSuccess;
}
+
+void Compiler::translateGEPs(Script &pScript) {
+ llvm::legacy::PassManager pPM;
+ pPM.add(createRSX86TranslateGEPPass());
+
+ // Materialization done in screenGlobalFunctions above.
+ pPM.run(pScript.getSource().getModule());
+}
diff --git a/lib/Renderscript/Android.mk b/lib/Renderscript/Android.mk
index b406be7..e392994 100644
--- a/lib/Renderscript/Android.mk
+++ b/lib/Renderscript/Android.mk
@@ -34,7 +34,8 @@
RSScreenFunctionsPass.cpp \
RSStubsWhiteList.cpp \
RSScriptGroupFusion.cpp \
- RSX86CallConvPass.cpp
+ RSX86CallConvPass.cpp \
+ RSX86TranslateGEPPass.cpp
#=====================================================================
# Device Static Library: libbccRenderscript
diff --git a/lib/Renderscript/RSCompilerDriver.cpp b/lib/Renderscript/RSCompilerDriver.cpp
index d851187..0370e42 100644
--- a/lib/Renderscript/RSCompilerDriver.cpp
+++ b/lib/Renderscript/RSCompilerDriver.cpp
@@ -23,6 +23,7 @@
#include <llvm/Support/CommandLine.h>
#include <llvm/Support/Path.h>
#include <llvm/Support/raw_ostream.h>
+#include <llvm/Target/TargetMachine.h>
#include "bcinfo/BitcodeWrapper.h"
#include "bcc/Assert.h"
@@ -126,6 +127,19 @@
return Compiler::kErrInvalidSource;
}
+ // For (32-bit) x86, translate GEPs on structs or arrays of structs to GEPs on
+ // int8* with byte offsets. This is to ensure that layout of structs with
+ // 64-bit scalar fields matches frontend-generated code that adheres to ARM
+ // data layout.
+ //
+ // The translation is done before RenderScript runtime library is linked
+ // (during LinkRuntime below) to ensure that RenderScript-driver-provided
+ // structs (like Allocation_t) don't get forced into using the ARM layout
+ // rules.
+ if (mCompiler.getTargetMachine().getTargetTriple().getArch() == llvm::Triple::x86) {
+ mCompiler.translateGEPs(pScript);
+ }
+
//===--------------------------------------------------------------------===//
// Link RS script with Renderscript runtime.
//===--------------------------------------------------------------------===//
diff --git a/lib/Renderscript/RSX86TranslateGEPPass.cpp b/lib/Renderscript/RSX86TranslateGEPPass.cpp
new file mode 100644
index 0000000..75fc2ed
--- /dev/null
+++ b/lib/Renderscript/RSX86TranslateGEPPass.cpp
@@ -0,0 +1,181 @@
+/*
+ * Copyright 2016, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "bcc/Assert.h"
+#include "bcc/Config/Config.h"
+#include "bcc/Support/Log.h"
+#include "bcc/Renderscript/RSTransforms.h"
+
+#include <cstdlib>
+
+#include <llvm/IR/Function.h>
+#include <llvm/IR/Instructions.h>
+#include <llvm/IR/Module.h>
+#include <llvm/Pass.h>
+#include <llvm/IR/GetElementPtrTypeIterator.h>
+
+namespace { // anonymous namespace
+
+/* This pass translates GEPs that index into structs or arrays of structs to
+ * GEPs with an int8* operand and a byte offset. This translation is done to
+ * enforce on x86 the ARM alignment rule that 64-bit scalars be 8-byte aligned
+ * for structs with such scalars.
+ */
+class RSX86TranslateGEPPass : public llvm::FunctionPass {
+private:
+ static char ID;
+ llvm::LLVMContext *Context;
+ const llvm::DataLayout DL;
+
+ // Walk a GEP instruction and return true if any type indexed is a struct.
+ bool GEPIndexesStructType(const llvm::GetElementPtrInst *GEP) {
+ for (llvm::gep_type_iterator GTI = gep_type_begin(GEP),
+ GTE = gep_type_end(GEP);
+ GTI != GTE; ++GTI) {
+ if (llvm::dyn_cast<llvm::StructType>(*GTI)) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ // Helper method to add two llvm::Value parameters
+ llvm::Value *incrementOffset(llvm::Value *accum, llvm::Value *incr,
+ llvm::Instruction *InsertBefore) {
+ if (accum == nullptr)
+ return incr;
+ return llvm::BinaryOperator::CreateAdd(accum, incr, "", InsertBefore);
+ }
+
+ // Compute the byte offset for a GEP from the GEP's base pointer operand.
+ // Based on visitGetElementPtrInst in llvm/lib/Transforms/Scalar/SROA.cpp.
+ // The difference is that this function handles non-constant array indices and
+ // constructs a sequence of instructions to calculate the offset. These
+ // instructions might not be the most efficient way to calculate this offset,
+ // but we rely on subsequent optimizations to do necessary fold/combine.
+ llvm::Value *computeGEPOffset(llvm::GetElementPtrInst *GEP) {
+ llvm::Value *Offset = nullptr;
+
+ for (llvm::gep_type_iterator GTI = gep_type_begin(GEP),
+ GTE = gep_type_end(GEP);
+ GTI != GTE; ++GTI) {
+ if (llvm::StructType *STy = llvm::dyn_cast<llvm::StructType>(*GTI)) {
+ llvm::ConstantInt *OpC = llvm::dyn_cast<llvm::ConstantInt>(GTI.getOperand());
+ if (!OpC) {
+ ALOGE("Operand for struct type is not constant!");
+ bccAssert(false);
+ }
+
+ // Offset = Offset + EltOffset for index into a struct
+ const llvm::StructLayout *SL = DL.getStructLayout(STy);
+ unsigned EltOffset = SL->getElementOffset(OpC->getZExtValue());
+ llvm::Value *Incr = llvm::ConstantInt::get(
+ llvm::Type::getInt32Ty(*Context), EltOffset);
+ Offset = incrementOffset(Offset, Incr, GEP);
+ } else {
+ // Offset = Offset + Index * EltSize for index into an array or a vector
+ llvm::Value *EltSize = llvm::ConstantInt::get(
+ llvm::Type::getInt32Ty(*Context),
+ DL.getTypeAllocSize(GTI.getIndexedType()));
+ llvm::Value *Incr = llvm::BinaryOperator::CreateMul(
+ GTI.getOperand() /* Index */,
+ EltSize, "", GEP);
+ Offset = incrementOffset(Offset, Incr, GEP);
+ }
+ }
+
+ return Offset;
+ }
+
+ void translateGEP(llvm::GetElementPtrInst *GEP) {
+ // cast GEP pointer operand to int8*
+ llvm::CastInst *Int8Ptr = llvm::CastInst::CreatePointerCast(
+ GEP->getPointerOperand(),
+ llvm::Type::getInt8PtrTy(*Context),
+ "to.int8ptr",
+ GEP);
+ llvm::Value *Indices[1] = {computeGEPOffset(GEP)};
+
+ // index into the int8* based on the byte offset
+ llvm::GetElementPtrInst *Int8PtrGEP = llvm::GetElementPtrInst::Create(
+ llvm::Type::getInt8Ty(*Context), Int8Ptr, llvm::makeArrayRef(Indices),
+ "int8ptr.indexed", GEP);
+ Int8PtrGEP->setIsInBounds(GEP->isInBounds());
+
+ // cast the indexed int8* back to the type of the original GEP
+ llvm::CastInst *OutCast = llvm::CastInst::CreatePointerCast(
+ Int8PtrGEP, GEP->getType(), "to.orig.geptype", GEP);
+
+ GEP->replaceAllUsesWith(OutCast);
+ }
+
+public:
+ RSX86TranslateGEPPass()
+ : FunctionPass (ID), DL(X86_CUSTOM_DL_STRING) {
+ }
+
+ virtual void getAnalysisUsage(llvm::AnalysisUsage &AU) const override {
+ // This pass is run in isolation in a separate pass manager. So setting
+ // AnalysisUsage is unnecessary. Set just for completeness.
+ AU.setPreservesCFG();
+ }
+
+ virtual bool runOnFunction(llvm::Function &F) override {
+ bool changed = false;
+ Context = &F.getParent()->getContext();
+
+ // To avoid updating/deleting instructions while walking a BasicBlock's instructions,
+ // collect the GEPs that need to be translated and process them
+ // subsequently.
+ std::vector<llvm::GetElementPtrInst *> GEPsToHandle;
+
+ for (auto &BB: F) {
+ for (auto &I: BB) {
+ if (auto *GEP = llvm::dyn_cast<llvm::GetElementPtrInst>(&I)) {
+ if (GEPIndexesStructType(GEP)) {
+ GEPsToHandle.push_back(GEP);
+ }
+ }
+ }
+ }
+
+ for (auto *GEP: GEPsToHandle) {
+ // Translate GEPs and erase them
+ translateGEP(GEP);
+ changed = true;
+ GEP->eraseFromParent();
+ }
+
+ return changed;
+ }
+
+ virtual const char *getPassName() const override {
+ return "Translate GEPs on structs, intended for x86 target";
+ }
+};
+
+}
+
+char RSX86TranslateGEPPass::ID = 0;
+
+namespace bcc {
+
+llvm::FunctionPass *
+createRSX86TranslateGEPPass() {
+ return new RSX86TranslateGEPPass();
+}
+
+}