Stanislav Mekhanoshin | 4c9c98f | 2019-08-12 17:12:29 +0000 | [diff] [blame] | 1 | //=== AMDGPUPrintfRuntimeBinding.cpp - OpenCL printf implementation -------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // \file |
| 9 | // |
| 10 | // The pass bind printfs to a kernel arg pointer that will be bound to a buffer |
| 11 | // later by the runtime. |
| 12 | // |
| 13 | // This pass traverses the functions in the module and converts |
| 14 | // each call to printf to a sequence of operations that |
| 15 | // store the following into the printf buffer: |
| 16 | // - format string (passed as a module's metadata unique ID) |
| 17 | // - bitwise copies of printf arguments |
| 18 | // The backend passes will need to store metadata in the kernel |
| 19 | //===----------------------------------------------------------------------===// |
| 20 | |
| 21 | #include "AMDGPU.h" |
| 22 | #include "llvm/ADT/SmallString.h" |
| 23 | #include "llvm/ADT/StringExtras.h" |
| 24 | #include "llvm/ADT/Triple.h" |
| 25 | #include "llvm/Analysis/InstructionSimplify.h" |
| 26 | #include "llvm/Analysis/TargetLibraryInfo.h" |
| 27 | #include "llvm/CodeGen/Passes.h" |
| 28 | #include "llvm/IR/Constants.h" |
| 29 | #include "llvm/IR/DataLayout.h" |
| 30 | #include "llvm/IR/Dominators.h" |
| 31 | #include "llvm/IR/GlobalVariable.h" |
| 32 | #include "llvm/IR/IRBuilder.h" |
Stanislav Mekhanoshin | 4c9c98f | 2019-08-12 17:12:29 +0000 | [diff] [blame] | 33 | #include "llvm/IR/Instructions.h" |
| 34 | #include "llvm/IR/Module.h" |
| 35 | #include "llvm/IR/Type.h" |
Reid Kleckner | 05da2fe | 2019-11-13 13:15:01 -0800 | [diff] [blame^] | 36 | #include "llvm/InitializePasses.h" |
Stanislav Mekhanoshin | 4c9c98f | 2019-08-12 17:12:29 +0000 | [diff] [blame] | 37 | #include "llvm/Support/CommandLine.h" |
| 38 | #include "llvm/Support/Debug.h" |
| 39 | #include "llvm/Support/raw_ostream.h" |
| 40 | #include "llvm/Transforms/Utils/BasicBlockUtils.h" |
| 41 | using namespace llvm; |
| 42 | |
| 43 | #define DEBUG_TYPE "printfToRuntime" |
| 44 | #define DWORD_ALIGN 4 |
| 45 | |
| 46 | namespace { |
| 47 | class LLVM_LIBRARY_VISIBILITY AMDGPUPrintfRuntimeBinding final |
Jay Foad | dafda61 | 2019-10-02 08:44:15 +0000 | [diff] [blame] | 48 | : public ModulePass { |
Stanislav Mekhanoshin | 4c9c98f | 2019-08-12 17:12:29 +0000 | [diff] [blame] | 49 | |
| 50 | public: |
| 51 | static char ID; |
| 52 | |
| 53 | explicit AMDGPUPrintfRuntimeBinding(); |
| 54 | |
Stanislav Mekhanoshin | 4c9c98f | 2019-08-12 17:12:29 +0000 | [diff] [blame] | 55 | private: |
| 56 | bool runOnModule(Module &M) override; |
| 57 | void getConversionSpecifiers(SmallVectorImpl<char> &OpConvSpecifiers, |
| 58 | StringRef fmt, size_t num_ops) const; |
| 59 | |
| 60 | bool shouldPrintAsStr(char Specifier, Type *OpType) const; |
Teresa Johnson | 9c27b59 | 2019-09-07 03:09:36 +0000 | [diff] [blame] | 61 | bool |
| 62 | lowerPrintfForGpu(Module &M, |
| 63 | function_ref<const TargetLibraryInfo &(Function &)> GetTLI); |
Stanislav Mekhanoshin | 4c9c98f | 2019-08-12 17:12:29 +0000 | [diff] [blame] | 64 | |
| 65 | void getAnalysisUsage(AnalysisUsage &AU) const override { |
| 66 | AU.addRequired<TargetLibraryInfoWrapperPass>(); |
| 67 | AU.addRequired<DominatorTreeWrapperPass>(); |
| 68 | } |
| 69 | |
Teresa Johnson | 9c27b59 | 2019-09-07 03:09:36 +0000 | [diff] [blame] | 70 | Value *simplify(Instruction *I, const TargetLibraryInfo *TLI) { |
Stanislav Mekhanoshin | 4c9c98f | 2019-08-12 17:12:29 +0000 | [diff] [blame] | 71 | return SimplifyInstruction(I, {*TD, TLI, DT}); |
| 72 | } |
| 73 | |
| 74 | const DataLayout *TD; |
| 75 | const DominatorTree *DT; |
Jay Foad | dafda61 | 2019-10-02 08:44:15 +0000 | [diff] [blame] | 76 | SmallVector<CallInst *, 32> Printfs; |
Stanislav Mekhanoshin | 4c9c98f | 2019-08-12 17:12:29 +0000 | [diff] [blame] | 77 | }; |
| 78 | } // namespace |
| 79 | |
| 80 | char AMDGPUPrintfRuntimeBinding::ID = 0; |
| 81 | |
| 82 | INITIALIZE_PASS_BEGIN(AMDGPUPrintfRuntimeBinding, |
| 83 | "amdgpu-printf-runtime-binding", "AMDGPU Printf lowering", |
| 84 | false, false) |
| 85 | INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) |
| 86 | INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) |
| 87 | INITIALIZE_PASS_END(AMDGPUPrintfRuntimeBinding, "amdgpu-printf-runtime-binding", |
| 88 | "AMDGPU Printf lowering", false, false) |
| 89 | |
| 90 | char &llvm::AMDGPUPrintfRuntimeBindingID = AMDGPUPrintfRuntimeBinding::ID; |
| 91 | |
| 92 | namespace llvm { |
| 93 | ModulePass *createAMDGPUPrintfRuntimeBinding() { |
| 94 | return new AMDGPUPrintfRuntimeBinding(); |
| 95 | } |
| 96 | } // namespace llvm |
| 97 | |
| 98 | AMDGPUPrintfRuntimeBinding::AMDGPUPrintfRuntimeBinding() |
Teresa Johnson | 9c27b59 | 2019-09-07 03:09:36 +0000 | [diff] [blame] | 99 | : ModulePass(ID), TD(nullptr), DT(nullptr) { |
Stanislav Mekhanoshin | 4c9c98f | 2019-08-12 17:12:29 +0000 | [diff] [blame] | 100 | initializeAMDGPUPrintfRuntimeBindingPass(*PassRegistry::getPassRegistry()); |
| 101 | } |
| 102 | |
Stanislav Mekhanoshin | 4c9c98f | 2019-08-12 17:12:29 +0000 | [diff] [blame] | 103 | void AMDGPUPrintfRuntimeBinding::getConversionSpecifiers( |
| 104 | SmallVectorImpl<char> &OpConvSpecifiers, StringRef Fmt, |
| 105 | size_t NumOps) const { |
| 106 | // not all format characters are collected. |
| 107 | // At this time the format characters of interest |
| 108 | // are %p and %s, which use to know if we |
| 109 | // are either storing a literal string or a |
| 110 | // pointer to the printf buffer. |
| 111 | static const char ConvSpecifiers[] = "cdieEfgGaosuxXp"; |
| 112 | size_t CurFmtSpecifierIdx = 0; |
| 113 | size_t PrevFmtSpecifierIdx = 0; |
| 114 | |
| 115 | while ((CurFmtSpecifierIdx = Fmt.find_first_of( |
| 116 | ConvSpecifiers, CurFmtSpecifierIdx)) != StringRef::npos) { |
| 117 | bool ArgDump = false; |
| 118 | StringRef CurFmt = Fmt.substr(PrevFmtSpecifierIdx, |
| 119 | CurFmtSpecifierIdx - PrevFmtSpecifierIdx); |
| 120 | size_t pTag = CurFmt.find_last_of("%"); |
| 121 | if (pTag != StringRef::npos) { |
| 122 | ArgDump = true; |
| 123 | while (pTag && CurFmt[--pTag] == '%') { |
| 124 | ArgDump = !ArgDump; |
| 125 | } |
| 126 | } |
| 127 | |
| 128 | if (ArgDump) |
| 129 | OpConvSpecifiers.push_back(Fmt[CurFmtSpecifierIdx]); |
| 130 | |
| 131 | PrevFmtSpecifierIdx = ++CurFmtSpecifierIdx; |
| 132 | } |
| 133 | } |
| 134 | |
| 135 | bool AMDGPUPrintfRuntimeBinding::shouldPrintAsStr(char Specifier, |
| 136 | Type *OpType) const { |
| 137 | if (Specifier != 's') |
| 138 | return false; |
| 139 | const PointerType *PT = dyn_cast<PointerType>(OpType); |
| 140 | if (!PT || PT->getAddressSpace() != AMDGPUAS::CONSTANT_ADDRESS) |
| 141 | return false; |
| 142 | Type *ElemType = PT->getContainedType(0); |
| 143 | if (ElemType->getTypeID() != Type::IntegerTyID) |
| 144 | return false; |
| 145 | IntegerType *ElemIType = cast<IntegerType>(ElemType); |
| 146 | return ElemIType->getBitWidth() == 8; |
| 147 | } |
| 148 | |
Teresa Johnson | 9c27b59 | 2019-09-07 03:09:36 +0000 | [diff] [blame] | 149 | bool AMDGPUPrintfRuntimeBinding::lowerPrintfForGpu( |
| 150 | Module &M, function_ref<const TargetLibraryInfo &(Function &)> GetTLI) { |
Stanislav Mekhanoshin | 4c9c98f | 2019-08-12 17:12:29 +0000 | [diff] [blame] | 151 | LLVMContext &Ctx = M.getContext(); |
| 152 | IRBuilder<> Builder(Ctx); |
| 153 | Type *I32Ty = Type::getInt32Ty(Ctx); |
| 154 | unsigned UniqID = 0; |
| 155 | // NB: This is important for this string size to be divizable by 4 |
| 156 | const char NonLiteralStr[4] = "???"; |
| 157 | |
Jay Foad | dafda61 | 2019-10-02 08:44:15 +0000 | [diff] [blame] | 158 | for (auto CI : Printfs) { |
Stanislav Mekhanoshin | 4c9c98f | 2019-08-12 17:12:29 +0000 | [diff] [blame] | 159 | unsigned NumOps = CI->getNumArgOperands(); |
| 160 | |
| 161 | SmallString<16> OpConvSpecifiers; |
| 162 | Value *Op = CI->getArgOperand(0); |
| 163 | |
| 164 | if (auto LI = dyn_cast<LoadInst>(Op)) { |
| 165 | Op = LI->getPointerOperand(); |
| 166 | for (auto Use : Op->users()) { |
| 167 | if (auto SI = dyn_cast<StoreInst>(Use)) { |
| 168 | Op = SI->getValueOperand(); |
| 169 | break; |
| 170 | } |
| 171 | } |
| 172 | } |
| 173 | |
| 174 | if (auto I = dyn_cast<Instruction>(Op)) { |
Teresa Johnson | 9c27b59 | 2019-09-07 03:09:36 +0000 | [diff] [blame] | 175 | Value *Op_simplified = simplify(I, &GetTLI(*I->getFunction())); |
Stanislav Mekhanoshin | 4c9c98f | 2019-08-12 17:12:29 +0000 | [diff] [blame] | 176 | if (Op_simplified) |
| 177 | Op = Op_simplified; |
| 178 | } |
| 179 | |
| 180 | ConstantExpr *ConstExpr = dyn_cast<ConstantExpr>(Op); |
| 181 | |
| 182 | if (ConstExpr) { |
| 183 | GlobalVariable *GVar = dyn_cast<GlobalVariable>(ConstExpr->getOperand(0)); |
| 184 | |
| 185 | StringRef Str("unknown"); |
| 186 | if (GVar && GVar->hasInitializer()) { |
| 187 | auto Init = GVar->getInitializer(); |
| 188 | if (auto CA = dyn_cast<ConstantDataArray>(Init)) { |
| 189 | if (CA->isString()) |
| 190 | Str = CA->getAsCString(); |
| 191 | } else if (isa<ConstantAggregateZero>(Init)) { |
| 192 | Str = ""; |
| 193 | } |
| 194 | // |
| 195 | // we need this call to ascertain |
| 196 | // that we are printing a string |
| 197 | // or a pointer. It takes out the |
| 198 | // specifiers and fills up the first |
| 199 | // arg |
| 200 | getConversionSpecifiers(OpConvSpecifiers, Str, NumOps - 1); |
| 201 | } |
| 202 | // Add metadata for the string |
| 203 | std::string AStreamHolder; |
| 204 | raw_string_ostream Sizes(AStreamHolder); |
| 205 | int Sum = DWORD_ALIGN; |
| 206 | Sizes << CI->getNumArgOperands() - 1; |
| 207 | Sizes << ':'; |
| 208 | for (unsigned ArgCount = 1; ArgCount < CI->getNumArgOperands() && |
| 209 | ArgCount <= OpConvSpecifiers.size(); |
| 210 | ArgCount++) { |
| 211 | Value *Arg = CI->getArgOperand(ArgCount); |
| 212 | Type *ArgType = Arg->getType(); |
| 213 | unsigned ArgSize = TD->getTypeAllocSizeInBits(ArgType); |
| 214 | ArgSize = ArgSize / 8; |
| 215 | // |
| 216 | // ArgSize by design should be a multiple of DWORD_ALIGN, |
| 217 | // expand the arguments that do not follow this rule. |
| 218 | // |
| 219 | if (ArgSize % DWORD_ALIGN != 0) { |
| 220 | llvm::Type *ResType = llvm::Type::getInt32Ty(Ctx); |
| 221 | VectorType *LLVMVecType = llvm::dyn_cast<llvm::VectorType>(ArgType); |
| 222 | int NumElem = LLVMVecType ? LLVMVecType->getNumElements() : 1; |
| 223 | if (LLVMVecType && NumElem > 1) |
| 224 | ResType = llvm::VectorType::get(ResType, NumElem); |
| 225 | Builder.SetInsertPoint(CI); |
| 226 | Builder.SetCurrentDebugLocation(CI->getDebugLoc()); |
| 227 | if (OpConvSpecifiers[ArgCount - 1] == 'x' || |
| 228 | OpConvSpecifiers[ArgCount - 1] == 'X' || |
| 229 | OpConvSpecifiers[ArgCount - 1] == 'u' || |
| 230 | OpConvSpecifiers[ArgCount - 1] == 'o') |
| 231 | Arg = Builder.CreateZExt(Arg, ResType); |
| 232 | else |
| 233 | Arg = Builder.CreateSExt(Arg, ResType); |
| 234 | ArgType = Arg->getType(); |
| 235 | ArgSize = TD->getTypeAllocSizeInBits(ArgType); |
| 236 | ArgSize = ArgSize / 8; |
| 237 | CI->setOperand(ArgCount, Arg); |
| 238 | } |
| 239 | if (OpConvSpecifiers[ArgCount - 1] == 'f') { |
| 240 | ConstantFP *FpCons = dyn_cast<ConstantFP>(Arg); |
| 241 | if (FpCons) |
| 242 | ArgSize = 4; |
| 243 | else { |
| 244 | FPExtInst *FpExt = dyn_cast<FPExtInst>(Arg); |
| 245 | if (FpExt && FpExt->getType()->isDoubleTy() && |
| 246 | FpExt->getOperand(0)->getType()->isFloatTy()) |
| 247 | ArgSize = 4; |
| 248 | } |
| 249 | } |
| 250 | if (shouldPrintAsStr(OpConvSpecifiers[ArgCount - 1], ArgType)) { |
| 251 | if (ConstantExpr *ConstExpr = dyn_cast<ConstantExpr>(Arg)) { |
| 252 | GlobalVariable *GV = |
| 253 | dyn_cast<GlobalVariable>(ConstExpr->getOperand(0)); |
| 254 | if (GV && GV->hasInitializer()) { |
| 255 | Constant *Init = GV->getInitializer(); |
| 256 | ConstantDataArray *CA = dyn_cast<ConstantDataArray>(Init); |
| 257 | if (Init->isZeroValue() || CA->isString()) { |
| 258 | size_t SizeStr = Init->isZeroValue() |
| 259 | ? 1 |
| 260 | : (strlen(CA->getAsCString().data()) + 1); |
| 261 | size_t Rem = SizeStr % DWORD_ALIGN; |
| 262 | size_t NSizeStr = 0; |
| 263 | LLVM_DEBUG(dbgs() << "Printf string original size = " << SizeStr |
| 264 | << '\n'); |
| 265 | if (Rem) { |
| 266 | NSizeStr = SizeStr + (DWORD_ALIGN - Rem); |
| 267 | } else { |
| 268 | NSizeStr = SizeStr; |
| 269 | } |
| 270 | ArgSize = NSizeStr; |
| 271 | } |
| 272 | } else { |
| 273 | ArgSize = sizeof(NonLiteralStr); |
| 274 | } |
| 275 | } else { |
| 276 | ArgSize = sizeof(NonLiteralStr); |
| 277 | } |
| 278 | } |
| 279 | LLVM_DEBUG(dbgs() << "Printf ArgSize (in buffer) = " << ArgSize |
| 280 | << " for type: " << *ArgType << '\n'); |
| 281 | Sizes << ArgSize << ':'; |
| 282 | Sum += ArgSize; |
| 283 | } |
| 284 | LLVM_DEBUG(dbgs() << "Printf format string in source = " << Str.str() |
| 285 | << '\n'); |
| 286 | for (size_t I = 0; I < Str.size(); ++I) { |
| 287 | // Rest of the C escape sequences (e.g. \') are handled correctly |
| 288 | // by the MDParser |
| 289 | switch (Str[I]) { |
| 290 | case '\a': |
| 291 | Sizes << "\\a"; |
| 292 | break; |
| 293 | case '\b': |
| 294 | Sizes << "\\b"; |
| 295 | break; |
| 296 | case '\f': |
| 297 | Sizes << "\\f"; |
| 298 | break; |
| 299 | case '\n': |
| 300 | Sizes << "\\n"; |
| 301 | break; |
| 302 | case '\r': |
| 303 | Sizes << "\\r"; |
| 304 | break; |
| 305 | case '\v': |
| 306 | Sizes << "\\v"; |
| 307 | break; |
| 308 | case ':': |
| 309 | // ':' cannot be scanned by Flex, as it is defined as a delimiter |
| 310 | // Replace it with it's octal representation \72 |
| 311 | Sizes << "\\72"; |
| 312 | break; |
| 313 | default: |
| 314 | Sizes << Str[I]; |
| 315 | break; |
| 316 | } |
| 317 | } |
| 318 | |
| 319 | // Insert the printf_alloc call |
| 320 | Builder.SetInsertPoint(CI); |
| 321 | Builder.SetCurrentDebugLocation(CI->getDebugLoc()); |
| 322 | |
| 323 | AttributeList Attr = AttributeList::get(Ctx, AttributeList::FunctionIndex, |
| 324 | Attribute::NoUnwind); |
| 325 | |
| 326 | Type *SizetTy = Type::getInt32Ty(Ctx); |
| 327 | |
| 328 | Type *Tys_alloc[1] = {SizetTy}; |
| 329 | Type *I8Ptr = PointerType::get(Type::getInt8Ty(Ctx), 1); |
| 330 | FunctionType *FTy_alloc = FunctionType::get(I8Ptr, Tys_alloc, false); |
| 331 | FunctionCallee PrintfAllocFn = |
| 332 | M.getOrInsertFunction(StringRef("__printf_alloc"), FTy_alloc, Attr); |
| 333 | |
| 334 | LLVM_DEBUG(dbgs() << "Printf metadata = " << Sizes.str() << '\n'); |
| 335 | std::string fmtstr = itostr(++UniqID) + ":" + Sizes.str().c_str(); |
| 336 | MDString *fmtStrArray = MDString::get(Ctx, fmtstr); |
| 337 | |
| 338 | // Instead of creating global variables, the |
| 339 | // printf format strings are extracted |
| 340 | // and passed as metadata. This avoids |
| 341 | // polluting llvm's symbol tables in this module. |
| 342 | // Metadata is going to be extracted |
| 343 | // by the backend passes and inserted |
| 344 | // into the OpenCL binary as appropriate. |
| 345 | StringRef amd("llvm.printf.fmts"); |
| 346 | NamedMDNode *metaD = M.getOrInsertNamedMetadata(amd); |
| 347 | MDNode *myMD = MDNode::get(Ctx, fmtStrArray); |
| 348 | metaD->addOperand(myMD); |
| 349 | Value *sumC = ConstantInt::get(SizetTy, Sum, false); |
| 350 | SmallVector<Value *, 1> alloc_args; |
| 351 | alloc_args.push_back(sumC); |
| 352 | CallInst *pcall = |
| 353 | CallInst::Create(PrintfAllocFn, alloc_args, "printf_alloc_fn", CI); |
| 354 | |
| 355 | // |
| 356 | // Insert code to split basicblock with a |
| 357 | // piece of hammock code. |
| 358 | // basicblock splits after buffer overflow check |
| 359 | // |
| 360 | ConstantPointerNull *zeroIntPtr = |
| 361 | ConstantPointerNull::get(PointerType::get(Type::getInt8Ty(Ctx), 1)); |
| 362 | ICmpInst *cmp = |
| 363 | dyn_cast<ICmpInst>(Builder.CreateICmpNE(pcall, zeroIntPtr, "")); |
| 364 | if (!CI->use_empty()) { |
| 365 | Value *result = |
| 366 | Builder.CreateSExt(Builder.CreateNot(cmp), I32Ty, "printf_res"); |
| 367 | CI->replaceAllUsesWith(result); |
| 368 | } |
| 369 | SplitBlock(CI->getParent(), cmp); |
| 370 | Instruction *Brnch = |
| 371 | SplitBlockAndInsertIfThen(cmp, cmp->getNextNode(), false); |
| 372 | |
| 373 | Builder.SetInsertPoint(Brnch); |
| 374 | |
| 375 | // store unique printf id in the buffer |
| 376 | // |
| 377 | SmallVector<Value *, 1> ZeroIdxList; |
| 378 | ConstantInt *zeroInt = |
| 379 | ConstantInt::get(Ctx, APInt(32, StringRef("0"), 10)); |
| 380 | ZeroIdxList.push_back(zeroInt); |
| 381 | |
| 382 | GetElementPtrInst *BufferIdx = |
| 383 | dyn_cast<GetElementPtrInst>(GetElementPtrInst::Create( |
| 384 | nullptr, pcall, ZeroIdxList, "PrintBuffID", Brnch)); |
| 385 | |
| 386 | Type *idPointer = PointerType::get(I32Ty, AMDGPUAS::GLOBAL_ADDRESS); |
| 387 | Value *id_gep_cast = |
| 388 | new BitCastInst(BufferIdx, idPointer, "PrintBuffIdCast", Brnch); |
| 389 | |
| 390 | StoreInst *stbuff = |
| 391 | new StoreInst(ConstantInt::get(I32Ty, UniqID), id_gep_cast); |
| 392 | stbuff->insertBefore(Brnch); // to Remove unused variable warning |
| 393 | |
| 394 | SmallVector<Value *, 2> FourthIdxList; |
| 395 | ConstantInt *fourInt = |
| 396 | ConstantInt::get(Ctx, APInt(32, StringRef("4"), 10)); |
| 397 | |
| 398 | FourthIdxList.push_back(fourInt); // 1st 4 bytes hold the printf_id |
| 399 | // the following GEP is the buffer pointer |
| 400 | BufferIdx = cast<GetElementPtrInst>(GetElementPtrInst::Create( |
| 401 | nullptr, pcall, FourthIdxList, "PrintBuffGep", Brnch)); |
| 402 | |
| 403 | Type *Int32Ty = Type::getInt32Ty(Ctx); |
| 404 | Type *Int64Ty = Type::getInt64Ty(Ctx); |
| 405 | for (unsigned ArgCount = 1; ArgCount < CI->getNumArgOperands() && |
| 406 | ArgCount <= OpConvSpecifiers.size(); |
| 407 | ArgCount++) { |
| 408 | Value *Arg = CI->getArgOperand(ArgCount); |
| 409 | Type *ArgType = Arg->getType(); |
| 410 | SmallVector<Value *, 32> WhatToStore; |
| 411 | if (ArgType->isFPOrFPVectorTy() && |
| 412 | (ArgType->getTypeID() != Type::VectorTyID)) { |
| 413 | Type *IType = (ArgType->isFloatTy()) ? Int32Ty : Int64Ty; |
| 414 | if (OpConvSpecifiers[ArgCount - 1] == 'f') { |
| 415 | ConstantFP *fpCons = dyn_cast<ConstantFP>(Arg); |
| 416 | if (fpCons) { |
| 417 | APFloat Val(fpCons->getValueAPF()); |
| 418 | bool Lost = false; |
| 419 | Val.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven, |
| 420 | &Lost); |
| 421 | Arg = ConstantFP::get(Ctx, Val); |
| 422 | IType = Int32Ty; |
| 423 | } else { |
| 424 | FPExtInst *FpExt = dyn_cast<FPExtInst>(Arg); |
| 425 | if (FpExt && FpExt->getType()->isDoubleTy() && |
| 426 | FpExt->getOperand(0)->getType()->isFloatTy()) { |
| 427 | Arg = FpExt->getOperand(0); |
| 428 | IType = Int32Ty; |
| 429 | } |
| 430 | } |
| 431 | } |
| 432 | Arg = new BitCastInst(Arg, IType, "PrintArgFP", Brnch); |
| 433 | WhatToStore.push_back(Arg); |
| 434 | } else if (ArgType->getTypeID() == Type::PointerTyID) { |
| 435 | if (shouldPrintAsStr(OpConvSpecifiers[ArgCount - 1], ArgType)) { |
| 436 | const char *S = NonLiteralStr; |
| 437 | if (ConstantExpr *ConstExpr = dyn_cast<ConstantExpr>(Arg)) { |
| 438 | GlobalVariable *GV = |
| 439 | dyn_cast<GlobalVariable>(ConstExpr->getOperand(0)); |
| 440 | if (GV && GV->hasInitializer()) { |
| 441 | Constant *Init = GV->getInitializer(); |
| 442 | ConstantDataArray *CA = dyn_cast<ConstantDataArray>(Init); |
| 443 | if (Init->isZeroValue() || CA->isString()) { |
| 444 | S = Init->isZeroValue() ? "" : CA->getAsCString().data(); |
| 445 | } |
| 446 | } |
| 447 | } |
| 448 | size_t SizeStr = strlen(S) + 1; |
| 449 | size_t Rem = SizeStr % DWORD_ALIGN; |
| 450 | size_t NSizeStr = 0; |
| 451 | if (Rem) { |
| 452 | NSizeStr = SizeStr + (DWORD_ALIGN - Rem); |
| 453 | } else { |
| 454 | NSizeStr = SizeStr; |
| 455 | } |
| 456 | if (S[0]) { |
| 457 | char *MyNewStr = new char[NSizeStr](); |
| 458 | strcpy(MyNewStr, S); |
| 459 | int NumInts = NSizeStr / 4; |
| 460 | int CharC = 0; |
| 461 | while (NumInts) { |
| 462 | int ANum = *(int *)(MyNewStr + CharC); |
| 463 | CharC += 4; |
| 464 | NumInts--; |
| 465 | Value *ANumV = ConstantInt::get(Int32Ty, ANum, false); |
| 466 | WhatToStore.push_back(ANumV); |
| 467 | } |
| 468 | delete[] MyNewStr; |
| 469 | } else { |
| 470 | // Empty string, give a hint to RT it is no NULL |
| 471 | Value *ANumV = ConstantInt::get(Int32Ty, 0xFFFFFF00, false); |
| 472 | WhatToStore.push_back(ANumV); |
| 473 | } |
| 474 | } else { |
| 475 | uint64_t Size = TD->getTypeAllocSizeInBits(ArgType); |
| 476 | assert((Size == 32 || Size == 64) && "unsupported size"); |
| 477 | Type *DstType = (Size == 32) ? Int32Ty : Int64Ty; |
| 478 | Arg = new PtrToIntInst(Arg, DstType, "PrintArgPtr", Brnch); |
| 479 | WhatToStore.push_back(Arg); |
| 480 | } |
| 481 | } else if (ArgType->getTypeID() == Type::VectorTyID) { |
| 482 | Type *IType = NULL; |
| 483 | uint32_t EleCount = cast<VectorType>(ArgType)->getNumElements(); |
| 484 | uint32_t EleSize = ArgType->getScalarSizeInBits(); |
| 485 | uint32_t TotalSize = EleCount * EleSize; |
| 486 | if (EleCount == 3) { |
| 487 | IntegerType *Int32Ty = Type::getInt32Ty(ArgType->getContext()); |
| 488 | Constant *Indices[4] = { |
| 489 | ConstantInt::get(Int32Ty, 0), ConstantInt::get(Int32Ty, 1), |
| 490 | ConstantInt::get(Int32Ty, 2), ConstantInt::get(Int32Ty, 2)}; |
| 491 | Constant *Mask = ConstantVector::get(Indices); |
| 492 | ShuffleVectorInst *Shuffle = new ShuffleVectorInst(Arg, Arg, Mask); |
| 493 | Shuffle->insertBefore(Brnch); |
| 494 | Arg = Shuffle; |
| 495 | ArgType = Arg->getType(); |
| 496 | TotalSize += EleSize; |
| 497 | } |
| 498 | switch (EleSize) { |
| 499 | default: |
| 500 | EleCount = TotalSize / 64; |
| 501 | IType = dyn_cast<Type>(Type::getInt64Ty(ArgType->getContext())); |
| 502 | break; |
| 503 | case 8: |
| 504 | if (EleCount >= 8) { |
| 505 | EleCount = TotalSize / 64; |
| 506 | IType = dyn_cast<Type>(Type::getInt64Ty(ArgType->getContext())); |
| 507 | } else if (EleCount >= 3) { |
| 508 | EleCount = 1; |
| 509 | IType = dyn_cast<Type>(Type::getInt32Ty(ArgType->getContext())); |
| 510 | } else { |
| 511 | EleCount = 1; |
| 512 | IType = dyn_cast<Type>(Type::getInt16Ty(ArgType->getContext())); |
| 513 | } |
| 514 | break; |
| 515 | case 16: |
| 516 | if (EleCount >= 3) { |
| 517 | EleCount = TotalSize / 64; |
| 518 | IType = dyn_cast<Type>(Type::getInt64Ty(ArgType->getContext())); |
| 519 | } else { |
| 520 | EleCount = 1; |
| 521 | IType = dyn_cast<Type>(Type::getInt32Ty(ArgType->getContext())); |
| 522 | } |
| 523 | break; |
| 524 | } |
| 525 | if (EleCount > 1) { |
| 526 | IType = dyn_cast<Type>(VectorType::get(IType, EleCount)); |
| 527 | } |
| 528 | Arg = new BitCastInst(Arg, IType, "PrintArgVect", Brnch); |
| 529 | WhatToStore.push_back(Arg); |
| 530 | } else { |
| 531 | WhatToStore.push_back(Arg); |
| 532 | } |
Stanislav Mekhanoshin | 438315b | 2019-08-13 01:07:27 +0000 | [diff] [blame] | 533 | for (unsigned I = 0, E = WhatToStore.size(); I != E; ++I) { |
| 534 | Value *TheBtCast = WhatToStore[I]; |
Stanislav Mekhanoshin | 4c9c98f | 2019-08-12 17:12:29 +0000 | [diff] [blame] | 535 | unsigned ArgSize = |
| 536 | TD->getTypeAllocSizeInBits(TheBtCast->getType()) / 8; |
| 537 | SmallVector<Value *, 1> BuffOffset; |
| 538 | BuffOffset.push_back(ConstantInt::get(I32Ty, ArgSize)); |
| 539 | |
| 540 | Type *ArgPointer = PointerType::get(TheBtCast->getType(), 1); |
| 541 | Value *CastedGEP = |
| 542 | new BitCastInst(BufferIdx, ArgPointer, "PrintBuffPtrCast", Brnch); |
| 543 | StoreInst *StBuff = new StoreInst(TheBtCast, CastedGEP, Brnch); |
| 544 | LLVM_DEBUG(dbgs() << "inserting store to printf buffer:\n" |
| 545 | << *StBuff << '\n'); |
| 546 | (void)StBuff; |
Stanislav Mekhanoshin | 438315b | 2019-08-13 01:07:27 +0000 | [diff] [blame] | 547 | if (I + 1 == E && ArgCount + 1 == CI->getNumArgOperands()) |
Stanislav Mekhanoshin | 4c9c98f | 2019-08-12 17:12:29 +0000 | [diff] [blame] | 548 | break; |
| 549 | BufferIdx = dyn_cast<GetElementPtrInst>(GetElementPtrInst::Create( |
| 550 | nullptr, BufferIdx, BuffOffset, "PrintBuffNextPtr", Brnch)); |
| 551 | LLVM_DEBUG(dbgs() << "inserting gep to the printf buffer:\n" |
| 552 | << *BufferIdx << '\n'); |
| 553 | } |
| 554 | } |
| 555 | } |
| 556 | } |
| 557 | |
| 558 | // erase the printf calls |
Jay Foad | dafda61 | 2019-10-02 08:44:15 +0000 | [diff] [blame] | 559 | for (auto CI : Printfs) |
Stanislav Mekhanoshin | 4c9c98f | 2019-08-12 17:12:29 +0000 | [diff] [blame] | 560 | CI->eraseFromParent(); |
Stanislav Mekhanoshin | 4c9c98f | 2019-08-12 17:12:29 +0000 | [diff] [blame] | 561 | |
| 562 | Printfs.clear(); |
| 563 | return true; |
| 564 | } |
| 565 | |
| 566 | bool AMDGPUPrintfRuntimeBinding::runOnModule(Module &M) { |
| 567 | Triple TT(M.getTargetTriple()); |
| 568 | if (TT.getArch() == Triple::r600) |
| 569 | return false; |
| 570 | |
Jay Foad | dafda61 | 2019-10-02 08:44:15 +0000 | [diff] [blame] | 571 | auto PrintfFunction = M.getFunction("printf"); |
| 572 | if (!PrintfFunction) |
| 573 | return false; |
| 574 | |
| 575 | for (auto &U : PrintfFunction->uses()) { |
| 576 | if (auto *CI = dyn_cast<CallInst>(U.getUser())) { |
| 577 | if (CI->isCallee(&U)) |
| 578 | Printfs.push_back(CI); |
| 579 | } |
| 580 | } |
Stanislav Mekhanoshin | 4c9c98f | 2019-08-12 17:12:29 +0000 | [diff] [blame] | 581 | |
| 582 | if (Printfs.empty()) |
| 583 | return false; |
| 584 | |
| 585 | TD = &M.getDataLayout(); |
| 586 | auto DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>(); |
| 587 | DT = DTWP ? &DTWP->getDomTree() : nullptr; |
Teresa Johnson | 9c27b59 | 2019-09-07 03:09:36 +0000 | [diff] [blame] | 588 | auto GetTLI = [this](Function &F) -> TargetLibraryInfo & { |
| 589 | return this->getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F); |
| 590 | }; |
Stanislav Mekhanoshin | 4c9c98f | 2019-08-12 17:12:29 +0000 | [diff] [blame] | 591 | |
Teresa Johnson | 9c27b59 | 2019-09-07 03:09:36 +0000 | [diff] [blame] | 592 | return lowerPrintfForGpu(M, GetTLI); |
Stanislav Mekhanoshin | 4c9c98f | 2019-08-12 17:12:29 +0000 | [diff] [blame] | 593 | } |