Stanislav Mekhanoshin | 4c9c98f | 2019-08-12 17:12:29 +0000 | [diff] [blame] | 1 | //=== AMDGPUPrintfRuntimeBinding.cpp - OpenCL printf implementation -------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // \file |
| 9 | // |
| 10 | // The pass bind printfs to a kernel arg pointer that will be bound to a buffer |
| 11 | // later by the runtime. |
| 12 | // |
| 13 | // This pass traverses the functions in the module and converts |
| 14 | // each call to printf to a sequence of operations that |
| 15 | // store the following into the printf buffer: |
| 16 | // - format string (passed as a module's metadata unique ID) |
| 17 | // - bitwise copies of printf arguments |
| 18 | // The backend passes will need to store metadata in the kernel |
| 19 | //===----------------------------------------------------------------------===// |
| 20 | |
| 21 | #include "AMDGPU.h" |
| 22 | #include "llvm/ADT/SmallString.h" |
| 23 | #include "llvm/ADT/StringExtras.h" |
| 24 | #include "llvm/ADT/Triple.h" |
| 25 | #include "llvm/Analysis/InstructionSimplify.h" |
| 26 | #include "llvm/Analysis/TargetLibraryInfo.h" |
| 27 | #include "llvm/CodeGen/Passes.h" |
| 28 | #include "llvm/IR/Constants.h" |
| 29 | #include "llvm/IR/DataLayout.h" |
| 30 | #include "llvm/IR/Dominators.h" |
| 31 | #include "llvm/IR/GlobalVariable.h" |
| 32 | #include "llvm/IR/IRBuilder.h" |
Stanislav Mekhanoshin | 4c9c98f | 2019-08-12 17:12:29 +0000 | [diff] [blame] | 33 | #include "llvm/IR/Instructions.h" |
| 34 | #include "llvm/IR/Module.h" |
| 35 | #include "llvm/IR/Type.h" |
| 36 | #include "llvm/Support/CommandLine.h" |
| 37 | #include "llvm/Support/Debug.h" |
| 38 | #include "llvm/Support/raw_ostream.h" |
| 39 | #include "llvm/Transforms/Utils/BasicBlockUtils.h" |
| 40 | using namespace llvm; |
| 41 | |
| 42 | #define DEBUG_TYPE "printfToRuntime" |
| 43 | #define DWORD_ALIGN 4 |
| 44 | |
| 45 | namespace { |
| 46 | class LLVM_LIBRARY_VISIBILITY AMDGPUPrintfRuntimeBinding final |
Jay Foad | dafda61 | 2019-10-02 08:44:15 +0000 | [diff] [blame^] | 47 | : public ModulePass { |
Stanislav Mekhanoshin | 4c9c98f | 2019-08-12 17:12:29 +0000 | [diff] [blame] | 48 | |
| 49 | public: |
| 50 | static char ID; |
| 51 | |
| 52 | explicit AMDGPUPrintfRuntimeBinding(); |
| 53 | |
Stanislav Mekhanoshin | 4c9c98f | 2019-08-12 17:12:29 +0000 | [diff] [blame] | 54 | private: |
| 55 | bool runOnModule(Module &M) override; |
| 56 | void getConversionSpecifiers(SmallVectorImpl<char> &OpConvSpecifiers, |
| 57 | StringRef fmt, size_t num_ops) const; |
| 58 | |
| 59 | bool shouldPrintAsStr(char Specifier, Type *OpType) const; |
Teresa Johnson | 9c27b59 | 2019-09-07 03:09:36 +0000 | [diff] [blame] | 60 | bool |
| 61 | lowerPrintfForGpu(Module &M, |
| 62 | function_ref<const TargetLibraryInfo &(Function &)> GetTLI); |
Stanislav Mekhanoshin | 4c9c98f | 2019-08-12 17:12:29 +0000 | [diff] [blame] | 63 | |
| 64 | void getAnalysisUsage(AnalysisUsage &AU) const override { |
| 65 | AU.addRequired<TargetLibraryInfoWrapperPass>(); |
| 66 | AU.addRequired<DominatorTreeWrapperPass>(); |
| 67 | } |
| 68 | |
Teresa Johnson | 9c27b59 | 2019-09-07 03:09:36 +0000 | [diff] [blame] | 69 | Value *simplify(Instruction *I, const TargetLibraryInfo *TLI) { |
Stanislav Mekhanoshin | 4c9c98f | 2019-08-12 17:12:29 +0000 | [diff] [blame] | 70 | return SimplifyInstruction(I, {*TD, TLI, DT}); |
| 71 | } |
| 72 | |
| 73 | const DataLayout *TD; |
| 74 | const DominatorTree *DT; |
Jay Foad | dafda61 | 2019-10-02 08:44:15 +0000 | [diff] [blame^] | 75 | SmallVector<CallInst *, 32> Printfs; |
Stanislav Mekhanoshin | 4c9c98f | 2019-08-12 17:12:29 +0000 | [diff] [blame] | 76 | }; |
| 77 | } // namespace |
| 78 | |
| 79 | char AMDGPUPrintfRuntimeBinding::ID = 0; |
| 80 | |
| 81 | INITIALIZE_PASS_BEGIN(AMDGPUPrintfRuntimeBinding, |
| 82 | "amdgpu-printf-runtime-binding", "AMDGPU Printf lowering", |
| 83 | false, false) |
| 84 | INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) |
| 85 | INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) |
| 86 | INITIALIZE_PASS_END(AMDGPUPrintfRuntimeBinding, "amdgpu-printf-runtime-binding", |
| 87 | "AMDGPU Printf lowering", false, false) |
| 88 | |
| 89 | char &llvm::AMDGPUPrintfRuntimeBindingID = AMDGPUPrintfRuntimeBinding::ID; |
| 90 | |
| 91 | namespace llvm { |
| 92 | ModulePass *createAMDGPUPrintfRuntimeBinding() { |
| 93 | return new AMDGPUPrintfRuntimeBinding(); |
| 94 | } |
| 95 | } // namespace llvm |
| 96 | |
| 97 | AMDGPUPrintfRuntimeBinding::AMDGPUPrintfRuntimeBinding() |
Teresa Johnson | 9c27b59 | 2019-09-07 03:09:36 +0000 | [diff] [blame] | 98 | : ModulePass(ID), TD(nullptr), DT(nullptr) { |
Stanislav Mekhanoshin | 4c9c98f | 2019-08-12 17:12:29 +0000 | [diff] [blame] | 99 | initializeAMDGPUPrintfRuntimeBindingPass(*PassRegistry::getPassRegistry()); |
| 100 | } |
| 101 | |
Stanislav Mekhanoshin | 4c9c98f | 2019-08-12 17:12:29 +0000 | [diff] [blame] | 102 | void AMDGPUPrintfRuntimeBinding::getConversionSpecifiers( |
| 103 | SmallVectorImpl<char> &OpConvSpecifiers, StringRef Fmt, |
| 104 | size_t NumOps) const { |
| 105 | // not all format characters are collected. |
| 106 | // At this time the format characters of interest |
| 107 | // are %p and %s, which use to know if we |
| 108 | // are either storing a literal string or a |
| 109 | // pointer to the printf buffer. |
| 110 | static const char ConvSpecifiers[] = "cdieEfgGaosuxXp"; |
| 111 | size_t CurFmtSpecifierIdx = 0; |
| 112 | size_t PrevFmtSpecifierIdx = 0; |
| 113 | |
| 114 | while ((CurFmtSpecifierIdx = Fmt.find_first_of( |
| 115 | ConvSpecifiers, CurFmtSpecifierIdx)) != StringRef::npos) { |
| 116 | bool ArgDump = false; |
| 117 | StringRef CurFmt = Fmt.substr(PrevFmtSpecifierIdx, |
| 118 | CurFmtSpecifierIdx - PrevFmtSpecifierIdx); |
| 119 | size_t pTag = CurFmt.find_last_of("%"); |
| 120 | if (pTag != StringRef::npos) { |
| 121 | ArgDump = true; |
| 122 | while (pTag && CurFmt[--pTag] == '%') { |
| 123 | ArgDump = !ArgDump; |
| 124 | } |
| 125 | } |
| 126 | |
| 127 | if (ArgDump) |
| 128 | OpConvSpecifiers.push_back(Fmt[CurFmtSpecifierIdx]); |
| 129 | |
| 130 | PrevFmtSpecifierIdx = ++CurFmtSpecifierIdx; |
| 131 | } |
| 132 | } |
| 133 | |
| 134 | bool AMDGPUPrintfRuntimeBinding::shouldPrintAsStr(char Specifier, |
| 135 | Type *OpType) const { |
| 136 | if (Specifier != 's') |
| 137 | return false; |
| 138 | const PointerType *PT = dyn_cast<PointerType>(OpType); |
| 139 | if (!PT || PT->getAddressSpace() != AMDGPUAS::CONSTANT_ADDRESS) |
| 140 | return false; |
| 141 | Type *ElemType = PT->getContainedType(0); |
| 142 | if (ElemType->getTypeID() != Type::IntegerTyID) |
| 143 | return false; |
| 144 | IntegerType *ElemIType = cast<IntegerType>(ElemType); |
| 145 | return ElemIType->getBitWidth() == 8; |
| 146 | } |
| 147 | |
Teresa Johnson | 9c27b59 | 2019-09-07 03:09:36 +0000 | [diff] [blame] | 148 | bool AMDGPUPrintfRuntimeBinding::lowerPrintfForGpu( |
| 149 | Module &M, function_ref<const TargetLibraryInfo &(Function &)> GetTLI) { |
Stanislav Mekhanoshin | 4c9c98f | 2019-08-12 17:12:29 +0000 | [diff] [blame] | 150 | LLVMContext &Ctx = M.getContext(); |
| 151 | IRBuilder<> Builder(Ctx); |
| 152 | Type *I32Ty = Type::getInt32Ty(Ctx); |
| 153 | unsigned UniqID = 0; |
| 154 | // NB: This is important for this string size to be divizable by 4 |
| 155 | const char NonLiteralStr[4] = "???"; |
| 156 | |
Jay Foad | dafda61 | 2019-10-02 08:44:15 +0000 | [diff] [blame^] | 157 | for (auto CI : Printfs) { |
Stanislav Mekhanoshin | 4c9c98f | 2019-08-12 17:12:29 +0000 | [diff] [blame] | 158 | unsigned NumOps = CI->getNumArgOperands(); |
| 159 | |
| 160 | SmallString<16> OpConvSpecifiers; |
| 161 | Value *Op = CI->getArgOperand(0); |
| 162 | |
| 163 | if (auto LI = dyn_cast<LoadInst>(Op)) { |
| 164 | Op = LI->getPointerOperand(); |
| 165 | for (auto Use : Op->users()) { |
| 166 | if (auto SI = dyn_cast<StoreInst>(Use)) { |
| 167 | Op = SI->getValueOperand(); |
| 168 | break; |
| 169 | } |
| 170 | } |
| 171 | } |
| 172 | |
| 173 | if (auto I = dyn_cast<Instruction>(Op)) { |
Teresa Johnson | 9c27b59 | 2019-09-07 03:09:36 +0000 | [diff] [blame] | 174 | Value *Op_simplified = simplify(I, &GetTLI(*I->getFunction())); |
Stanislav Mekhanoshin | 4c9c98f | 2019-08-12 17:12:29 +0000 | [diff] [blame] | 175 | if (Op_simplified) |
| 176 | Op = Op_simplified; |
| 177 | } |
| 178 | |
| 179 | ConstantExpr *ConstExpr = dyn_cast<ConstantExpr>(Op); |
| 180 | |
| 181 | if (ConstExpr) { |
| 182 | GlobalVariable *GVar = dyn_cast<GlobalVariable>(ConstExpr->getOperand(0)); |
| 183 | |
| 184 | StringRef Str("unknown"); |
| 185 | if (GVar && GVar->hasInitializer()) { |
| 186 | auto Init = GVar->getInitializer(); |
| 187 | if (auto CA = dyn_cast<ConstantDataArray>(Init)) { |
| 188 | if (CA->isString()) |
| 189 | Str = CA->getAsCString(); |
| 190 | } else if (isa<ConstantAggregateZero>(Init)) { |
| 191 | Str = ""; |
| 192 | } |
| 193 | // |
| 194 | // we need this call to ascertain |
| 195 | // that we are printing a string |
| 196 | // or a pointer. It takes out the |
| 197 | // specifiers and fills up the first |
| 198 | // arg |
| 199 | getConversionSpecifiers(OpConvSpecifiers, Str, NumOps - 1); |
| 200 | } |
| 201 | // Add metadata for the string |
| 202 | std::string AStreamHolder; |
| 203 | raw_string_ostream Sizes(AStreamHolder); |
| 204 | int Sum = DWORD_ALIGN; |
| 205 | Sizes << CI->getNumArgOperands() - 1; |
| 206 | Sizes << ':'; |
| 207 | for (unsigned ArgCount = 1; ArgCount < CI->getNumArgOperands() && |
| 208 | ArgCount <= OpConvSpecifiers.size(); |
| 209 | ArgCount++) { |
| 210 | Value *Arg = CI->getArgOperand(ArgCount); |
| 211 | Type *ArgType = Arg->getType(); |
| 212 | unsigned ArgSize = TD->getTypeAllocSizeInBits(ArgType); |
| 213 | ArgSize = ArgSize / 8; |
| 214 | // |
| 215 | // ArgSize by design should be a multiple of DWORD_ALIGN, |
| 216 | // expand the arguments that do not follow this rule. |
| 217 | // |
| 218 | if (ArgSize % DWORD_ALIGN != 0) { |
| 219 | llvm::Type *ResType = llvm::Type::getInt32Ty(Ctx); |
| 220 | VectorType *LLVMVecType = llvm::dyn_cast<llvm::VectorType>(ArgType); |
| 221 | int NumElem = LLVMVecType ? LLVMVecType->getNumElements() : 1; |
| 222 | if (LLVMVecType && NumElem > 1) |
| 223 | ResType = llvm::VectorType::get(ResType, NumElem); |
| 224 | Builder.SetInsertPoint(CI); |
| 225 | Builder.SetCurrentDebugLocation(CI->getDebugLoc()); |
| 226 | if (OpConvSpecifiers[ArgCount - 1] == 'x' || |
| 227 | OpConvSpecifiers[ArgCount - 1] == 'X' || |
| 228 | OpConvSpecifiers[ArgCount - 1] == 'u' || |
| 229 | OpConvSpecifiers[ArgCount - 1] == 'o') |
| 230 | Arg = Builder.CreateZExt(Arg, ResType); |
| 231 | else |
| 232 | Arg = Builder.CreateSExt(Arg, ResType); |
| 233 | ArgType = Arg->getType(); |
| 234 | ArgSize = TD->getTypeAllocSizeInBits(ArgType); |
| 235 | ArgSize = ArgSize / 8; |
| 236 | CI->setOperand(ArgCount, Arg); |
| 237 | } |
| 238 | if (OpConvSpecifiers[ArgCount - 1] == 'f') { |
| 239 | ConstantFP *FpCons = dyn_cast<ConstantFP>(Arg); |
| 240 | if (FpCons) |
| 241 | ArgSize = 4; |
| 242 | else { |
| 243 | FPExtInst *FpExt = dyn_cast<FPExtInst>(Arg); |
| 244 | if (FpExt && FpExt->getType()->isDoubleTy() && |
| 245 | FpExt->getOperand(0)->getType()->isFloatTy()) |
| 246 | ArgSize = 4; |
| 247 | } |
| 248 | } |
| 249 | if (shouldPrintAsStr(OpConvSpecifiers[ArgCount - 1], ArgType)) { |
| 250 | if (ConstantExpr *ConstExpr = dyn_cast<ConstantExpr>(Arg)) { |
| 251 | GlobalVariable *GV = |
| 252 | dyn_cast<GlobalVariable>(ConstExpr->getOperand(0)); |
| 253 | if (GV && GV->hasInitializer()) { |
| 254 | Constant *Init = GV->getInitializer(); |
| 255 | ConstantDataArray *CA = dyn_cast<ConstantDataArray>(Init); |
| 256 | if (Init->isZeroValue() || CA->isString()) { |
| 257 | size_t SizeStr = Init->isZeroValue() |
| 258 | ? 1 |
| 259 | : (strlen(CA->getAsCString().data()) + 1); |
| 260 | size_t Rem = SizeStr % DWORD_ALIGN; |
| 261 | size_t NSizeStr = 0; |
| 262 | LLVM_DEBUG(dbgs() << "Printf string original size = " << SizeStr |
| 263 | << '\n'); |
| 264 | if (Rem) { |
| 265 | NSizeStr = SizeStr + (DWORD_ALIGN - Rem); |
| 266 | } else { |
| 267 | NSizeStr = SizeStr; |
| 268 | } |
| 269 | ArgSize = NSizeStr; |
| 270 | } |
| 271 | } else { |
| 272 | ArgSize = sizeof(NonLiteralStr); |
| 273 | } |
| 274 | } else { |
| 275 | ArgSize = sizeof(NonLiteralStr); |
| 276 | } |
| 277 | } |
| 278 | LLVM_DEBUG(dbgs() << "Printf ArgSize (in buffer) = " << ArgSize |
| 279 | << " for type: " << *ArgType << '\n'); |
| 280 | Sizes << ArgSize << ':'; |
| 281 | Sum += ArgSize; |
| 282 | } |
| 283 | LLVM_DEBUG(dbgs() << "Printf format string in source = " << Str.str() |
| 284 | << '\n'); |
| 285 | for (size_t I = 0; I < Str.size(); ++I) { |
| 286 | // Rest of the C escape sequences (e.g. \') are handled correctly |
| 287 | // by the MDParser |
| 288 | switch (Str[I]) { |
| 289 | case '\a': |
| 290 | Sizes << "\\a"; |
| 291 | break; |
| 292 | case '\b': |
| 293 | Sizes << "\\b"; |
| 294 | break; |
| 295 | case '\f': |
| 296 | Sizes << "\\f"; |
| 297 | break; |
| 298 | case '\n': |
| 299 | Sizes << "\\n"; |
| 300 | break; |
| 301 | case '\r': |
| 302 | Sizes << "\\r"; |
| 303 | break; |
| 304 | case '\v': |
| 305 | Sizes << "\\v"; |
| 306 | break; |
| 307 | case ':': |
| 308 | // ':' cannot be scanned by Flex, as it is defined as a delimiter |
| 309 | // Replace it with it's octal representation \72 |
| 310 | Sizes << "\\72"; |
| 311 | break; |
| 312 | default: |
| 313 | Sizes << Str[I]; |
| 314 | break; |
| 315 | } |
| 316 | } |
| 317 | |
| 318 | // Insert the printf_alloc call |
| 319 | Builder.SetInsertPoint(CI); |
| 320 | Builder.SetCurrentDebugLocation(CI->getDebugLoc()); |
| 321 | |
| 322 | AttributeList Attr = AttributeList::get(Ctx, AttributeList::FunctionIndex, |
| 323 | Attribute::NoUnwind); |
| 324 | |
| 325 | Type *SizetTy = Type::getInt32Ty(Ctx); |
| 326 | |
| 327 | Type *Tys_alloc[1] = {SizetTy}; |
| 328 | Type *I8Ptr = PointerType::get(Type::getInt8Ty(Ctx), 1); |
| 329 | FunctionType *FTy_alloc = FunctionType::get(I8Ptr, Tys_alloc, false); |
| 330 | FunctionCallee PrintfAllocFn = |
| 331 | M.getOrInsertFunction(StringRef("__printf_alloc"), FTy_alloc, Attr); |
| 332 | |
| 333 | LLVM_DEBUG(dbgs() << "Printf metadata = " << Sizes.str() << '\n'); |
| 334 | std::string fmtstr = itostr(++UniqID) + ":" + Sizes.str().c_str(); |
| 335 | MDString *fmtStrArray = MDString::get(Ctx, fmtstr); |
| 336 | |
| 337 | // Instead of creating global variables, the |
| 338 | // printf format strings are extracted |
| 339 | // and passed as metadata. This avoids |
| 340 | // polluting llvm's symbol tables in this module. |
| 341 | // Metadata is going to be extracted |
| 342 | // by the backend passes and inserted |
| 343 | // into the OpenCL binary as appropriate. |
| 344 | StringRef amd("llvm.printf.fmts"); |
| 345 | NamedMDNode *metaD = M.getOrInsertNamedMetadata(amd); |
| 346 | MDNode *myMD = MDNode::get(Ctx, fmtStrArray); |
| 347 | metaD->addOperand(myMD); |
| 348 | Value *sumC = ConstantInt::get(SizetTy, Sum, false); |
| 349 | SmallVector<Value *, 1> alloc_args; |
| 350 | alloc_args.push_back(sumC); |
| 351 | CallInst *pcall = |
| 352 | CallInst::Create(PrintfAllocFn, alloc_args, "printf_alloc_fn", CI); |
| 353 | |
| 354 | // |
| 355 | // Insert code to split basicblock with a |
| 356 | // piece of hammock code. |
| 357 | // basicblock splits after buffer overflow check |
| 358 | // |
| 359 | ConstantPointerNull *zeroIntPtr = |
| 360 | ConstantPointerNull::get(PointerType::get(Type::getInt8Ty(Ctx), 1)); |
| 361 | ICmpInst *cmp = |
| 362 | dyn_cast<ICmpInst>(Builder.CreateICmpNE(pcall, zeroIntPtr, "")); |
| 363 | if (!CI->use_empty()) { |
| 364 | Value *result = |
| 365 | Builder.CreateSExt(Builder.CreateNot(cmp), I32Ty, "printf_res"); |
| 366 | CI->replaceAllUsesWith(result); |
| 367 | } |
| 368 | SplitBlock(CI->getParent(), cmp); |
| 369 | Instruction *Brnch = |
| 370 | SplitBlockAndInsertIfThen(cmp, cmp->getNextNode(), false); |
| 371 | |
| 372 | Builder.SetInsertPoint(Brnch); |
| 373 | |
| 374 | // store unique printf id in the buffer |
| 375 | // |
| 376 | SmallVector<Value *, 1> ZeroIdxList; |
| 377 | ConstantInt *zeroInt = |
| 378 | ConstantInt::get(Ctx, APInt(32, StringRef("0"), 10)); |
| 379 | ZeroIdxList.push_back(zeroInt); |
| 380 | |
| 381 | GetElementPtrInst *BufferIdx = |
| 382 | dyn_cast<GetElementPtrInst>(GetElementPtrInst::Create( |
| 383 | nullptr, pcall, ZeroIdxList, "PrintBuffID", Brnch)); |
| 384 | |
| 385 | Type *idPointer = PointerType::get(I32Ty, AMDGPUAS::GLOBAL_ADDRESS); |
| 386 | Value *id_gep_cast = |
| 387 | new BitCastInst(BufferIdx, idPointer, "PrintBuffIdCast", Brnch); |
| 388 | |
| 389 | StoreInst *stbuff = |
| 390 | new StoreInst(ConstantInt::get(I32Ty, UniqID), id_gep_cast); |
| 391 | stbuff->insertBefore(Brnch); // to Remove unused variable warning |
| 392 | |
| 393 | SmallVector<Value *, 2> FourthIdxList; |
| 394 | ConstantInt *fourInt = |
| 395 | ConstantInt::get(Ctx, APInt(32, StringRef("4"), 10)); |
| 396 | |
| 397 | FourthIdxList.push_back(fourInt); // 1st 4 bytes hold the printf_id |
| 398 | // the following GEP is the buffer pointer |
| 399 | BufferIdx = cast<GetElementPtrInst>(GetElementPtrInst::Create( |
| 400 | nullptr, pcall, FourthIdxList, "PrintBuffGep", Brnch)); |
| 401 | |
| 402 | Type *Int32Ty = Type::getInt32Ty(Ctx); |
| 403 | Type *Int64Ty = Type::getInt64Ty(Ctx); |
| 404 | for (unsigned ArgCount = 1; ArgCount < CI->getNumArgOperands() && |
| 405 | ArgCount <= OpConvSpecifiers.size(); |
| 406 | ArgCount++) { |
| 407 | Value *Arg = CI->getArgOperand(ArgCount); |
| 408 | Type *ArgType = Arg->getType(); |
| 409 | SmallVector<Value *, 32> WhatToStore; |
| 410 | if (ArgType->isFPOrFPVectorTy() && |
| 411 | (ArgType->getTypeID() != Type::VectorTyID)) { |
| 412 | Type *IType = (ArgType->isFloatTy()) ? Int32Ty : Int64Ty; |
| 413 | if (OpConvSpecifiers[ArgCount - 1] == 'f') { |
| 414 | ConstantFP *fpCons = dyn_cast<ConstantFP>(Arg); |
| 415 | if (fpCons) { |
| 416 | APFloat Val(fpCons->getValueAPF()); |
| 417 | bool Lost = false; |
| 418 | Val.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven, |
| 419 | &Lost); |
| 420 | Arg = ConstantFP::get(Ctx, Val); |
| 421 | IType = Int32Ty; |
| 422 | } else { |
| 423 | FPExtInst *FpExt = dyn_cast<FPExtInst>(Arg); |
| 424 | if (FpExt && FpExt->getType()->isDoubleTy() && |
| 425 | FpExt->getOperand(0)->getType()->isFloatTy()) { |
| 426 | Arg = FpExt->getOperand(0); |
| 427 | IType = Int32Ty; |
| 428 | } |
| 429 | } |
| 430 | } |
| 431 | Arg = new BitCastInst(Arg, IType, "PrintArgFP", Brnch); |
| 432 | WhatToStore.push_back(Arg); |
| 433 | } else if (ArgType->getTypeID() == Type::PointerTyID) { |
| 434 | if (shouldPrintAsStr(OpConvSpecifiers[ArgCount - 1], ArgType)) { |
| 435 | const char *S = NonLiteralStr; |
| 436 | if (ConstantExpr *ConstExpr = dyn_cast<ConstantExpr>(Arg)) { |
| 437 | GlobalVariable *GV = |
| 438 | dyn_cast<GlobalVariable>(ConstExpr->getOperand(0)); |
| 439 | if (GV && GV->hasInitializer()) { |
| 440 | Constant *Init = GV->getInitializer(); |
| 441 | ConstantDataArray *CA = dyn_cast<ConstantDataArray>(Init); |
| 442 | if (Init->isZeroValue() || CA->isString()) { |
| 443 | S = Init->isZeroValue() ? "" : CA->getAsCString().data(); |
| 444 | } |
| 445 | } |
| 446 | } |
| 447 | size_t SizeStr = strlen(S) + 1; |
| 448 | size_t Rem = SizeStr % DWORD_ALIGN; |
| 449 | size_t NSizeStr = 0; |
| 450 | if (Rem) { |
| 451 | NSizeStr = SizeStr + (DWORD_ALIGN - Rem); |
| 452 | } else { |
| 453 | NSizeStr = SizeStr; |
| 454 | } |
| 455 | if (S[0]) { |
| 456 | char *MyNewStr = new char[NSizeStr](); |
| 457 | strcpy(MyNewStr, S); |
| 458 | int NumInts = NSizeStr / 4; |
| 459 | int CharC = 0; |
| 460 | while (NumInts) { |
| 461 | int ANum = *(int *)(MyNewStr + CharC); |
| 462 | CharC += 4; |
| 463 | NumInts--; |
| 464 | Value *ANumV = ConstantInt::get(Int32Ty, ANum, false); |
| 465 | WhatToStore.push_back(ANumV); |
| 466 | } |
| 467 | delete[] MyNewStr; |
| 468 | } else { |
| 469 | // Empty string, give a hint to RT it is no NULL |
| 470 | Value *ANumV = ConstantInt::get(Int32Ty, 0xFFFFFF00, false); |
| 471 | WhatToStore.push_back(ANumV); |
| 472 | } |
| 473 | } else { |
| 474 | uint64_t Size = TD->getTypeAllocSizeInBits(ArgType); |
| 475 | assert((Size == 32 || Size == 64) && "unsupported size"); |
| 476 | Type *DstType = (Size == 32) ? Int32Ty : Int64Ty; |
| 477 | Arg = new PtrToIntInst(Arg, DstType, "PrintArgPtr", Brnch); |
| 478 | WhatToStore.push_back(Arg); |
| 479 | } |
| 480 | } else if (ArgType->getTypeID() == Type::VectorTyID) { |
| 481 | Type *IType = NULL; |
| 482 | uint32_t EleCount = cast<VectorType>(ArgType)->getNumElements(); |
| 483 | uint32_t EleSize = ArgType->getScalarSizeInBits(); |
| 484 | uint32_t TotalSize = EleCount * EleSize; |
| 485 | if (EleCount == 3) { |
| 486 | IntegerType *Int32Ty = Type::getInt32Ty(ArgType->getContext()); |
| 487 | Constant *Indices[4] = { |
| 488 | ConstantInt::get(Int32Ty, 0), ConstantInt::get(Int32Ty, 1), |
| 489 | ConstantInt::get(Int32Ty, 2), ConstantInt::get(Int32Ty, 2)}; |
| 490 | Constant *Mask = ConstantVector::get(Indices); |
| 491 | ShuffleVectorInst *Shuffle = new ShuffleVectorInst(Arg, Arg, Mask); |
| 492 | Shuffle->insertBefore(Brnch); |
| 493 | Arg = Shuffle; |
| 494 | ArgType = Arg->getType(); |
| 495 | TotalSize += EleSize; |
| 496 | } |
| 497 | switch (EleSize) { |
| 498 | default: |
| 499 | EleCount = TotalSize / 64; |
| 500 | IType = dyn_cast<Type>(Type::getInt64Ty(ArgType->getContext())); |
| 501 | break; |
| 502 | case 8: |
| 503 | if (EleCount >= 8) { |
| 504 | EleCount = TotalSize / 64; |
| 505 | IType = dyn_cast<Type>(Type::getInt64Ty(ArgType->getContext())); |
| 506 | } else if (EleCount >= 3) { |
| 507 | EleCount = 1; |
| 508 | IType = dyn_cast<Type>(Type::getInt32Ty(ArgType->getContext())); |
| 509 | } else { |
| 510 | EleCount = 1; |
| 511 | IType = dyn_cast<Type>(Type::getInt16Ty(ArgType->getContext())); |
| 512 | } |
| 513 | break; |
| 514 | case 16: |
| 515 | if (EleCount >= 3) { |
| 516 | EleCount = TotalSize / 64; |
| 517 | IType = dyn_cast<Type>(Type::getInt64Ty(ArgType->getContext())); |
| 518 | } else { |
| 519 | EleCount = 1; |
| 520 | IType = dyn_cast<Type>(Type::getInt32Ty(ArgType->getContext())); |
| 521 | } |
| 522 | break; |
| 523 | } |
| 524 | if (EleCount > 1) { |
| 525 | IType = dyn_cast<Type>(VectorType::get(IType, EleCount)); |
| 526 | } |
| 527 | Arg = new BitCastInst(Arg, IType, "PrintArgVect", Brnch); |
| 528 | WhatToStore.push_back(Arg); |
| 529 | } else { |
| 530 | WhatToStore.push_back(Arg); |
| 531 | } |
Stanislav Mekhanoshin | 438315b | 2019-08-13 01:07:27 +0000 | [diff] [blame] | 532 | for (unsigned I = 0, E = WhatToStore.size(); I != E; ++I) { |
| 533 | Value *TheBtCast = WhatToStore[I]; |
Stanislav Mekhanoshin | 4c9c98f | 2019-08-12 17:12:29 +0000 | [diff] [blame] | 534 | unsigned ArgSize = |
| 535 | TD->getTypeAllocSizeInBits(TheBtCast->getType()) / 8; |
| 536 | SmallVector<Value *, 1> BuffOffset; |
| 537 | BuffOffset.push_back(ConstantInt::get(I32Ty, ArgSize)); |
| 538 | |
| 539 | Type *ArgPointer = PointerType::get(TheBtCast->getType(), 1); |
| 540 | Value *CastedGEP = |
| 541 | new BitCastInst(BufferIdx, ArgPointer, "PrintBuffPtrCast", Brnch); |
| 542 | StoreInst *StBuff = new StoreInst(TheBtCast, CastedGEP, Brnch); |
| 543 | LLVM_DEBUG(dbgs() << "inserting store to printf buffer:\n" |
| 544 | << *StBuff << '\n'); |
| 545 | (void)StBuff; |
Stanislav Mekhanoshin | 438315b | 2019-08-13 01:07:27 +0000 | [diff] [blame] | 546 | if (I + 1 == E && ArgCount + 1 == CI->getNumArgOperands()) |
Stanislav Mekhanoshin | 4c9c98f | 2019-08-12 17:12:29 +0000 | [diff] [blame] | 547 | break; |
| 548 | BufferIdx = dyn_cast<GetElementPtrInst>(GetElementPtrInst::Create( |
| 549 | nullptr, BufferIdx, BuffOffset, "PrintBuffNextPtr", Brnch)); |
| 550 | LLVM_DEBUG(dbgs() << "inserting gep to the printf buffer:\n" |
| 551 | << *BufferIdx << '\n'); |
| 552 | } |
| 553 | } |
| 554 | } |
| 555 | } |
| 556 | |
| 557 | // erase the printf calls |
Jay Foad | dafda61 | 2019-10-02 08:44:15 +0000 | [diff] [blame^] | 558 | for (auto CI : Printfs) |
Stanislav Mekhanoshin | 4c9c98f | 2019-08-12 17:12:29 +0000 | [diff] [blame] | 559 | CI->eraseFromParent(); |
Stanislav Mekhanoshin | 4c9c98f | 2019-08-12 17:12:29 +0000 | [diff] [blame] | 560 | |
| 561 | Printfs.clear(); |
| 562 | return true; |
| 563 | } |
| 564 | |
| 565 | bool AMDGPUPrintfRuntimeBinding::runOnModule(Module &M) { |
| 566 | Triple TT(M.getTargetTriple()); |
| 567 | if (TT.getArch() == Triple::r600) |
| 568 | return false; |
| 569 | |
Jay Foad | dafda61 | 2019-10-02 08:44:15 +0000 | [diff] [blame^] | 570 | auto PrintfFunction = M.getFunction("printf"); |
| 571 | if (!PrintfFunction) |
| 572 | return false; |
| 573 | |
| 574 | for (auto &U : PrintfFunction->uses()) { |
| 575 | if (auto *CI = dyn_cast<CallInst>(U.getUser())) { |
| 576 | if (CI->isCallee(&U)) |
| 577 | Printfs.push_back(CI); |
| 578 | } |
| 579 | } |
Stanislav Mekhanoshin | 4c9c98f | 2019-08-12 17:12:29 +0000 | [diff] [blame] | 580 | |
| 581 | if (Printfs.empty()) |
| 582 | return false; |
| 583 | |
| 584 | TD = &M.getDataLayout(); |
| 585 | auto DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>(); |
| 586 | DT = DTWP ? &DTWP->getDomTree() : nullptr; |
Teresa Johnson | 9c27b59 | 2019-09-07 03:09:36 +0000 | [diff] [blame] | 587 | auto GetTLI = [this](Function &F) -> TargetLibraryInfo & { |
| 588 | return this->getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F); |
| 589 | }; |
Stanislav Mekhanoshin | 4c9c98f | 2019-08-12 17:12:29 +0000 | [diff] [blame] | 590 | |
Teresa Johnson | 9c27b59 | 2019-09-07 03:09:36 +0000 | [diff] [blame] | 591 | return lowerPrintfForGpu(M, GetTLI); |
Stanislav Mekhanoshin | 4c9c98f | 2019-08-12 17:12:29 +0000 | [diff] [blame] | 592 | } |