Tobias Grosser | 8d7c4db | 2012-05-30 13:53:57 +0000 | [diff] [blame] | 1 | From 28e525d58b236efa5db562c2931c996e26d6996b Mon Sep 17 00:00:00 2001 |
| 2 | From: Yabin Hu <yabin.hwu@gmail.com> |
| 3 | Date: Wed, 23 May 2012 09:19:36 +0800 |
| 4 | Subject: [PATCH] Add llvm.codegen intrinsic. |
| 5 | |
| 6 | The llvm.codegen intrinsic generates code for embedded LLVM-IR |
| 7 | strings. Each call to the intrinsic is replaced by a pointer to |
| 8 | the newly generated target code. The code generation target can be |
| 9 | different to the one of the parent module. |
| 10 | --- |
| 11 | docs/LangRef.html | 36 +++ |
| 12 | include/llvm/CodeGen/Passes.h | 3 + |
| 13 | include/llvm/InitializePasses.h | 1 + |
| 14 | include/llvm/Intrinsics.td | 4 + |
| 15 | lib/CodeGen/CMakeLists.txt | 1 + |
| 16 | lib/CodeGen/CodeGen.cpp | 1 + |
| 17 | lib/CodeGen/CodeGenIntrinsic.cpp | 229 ++++++++++++++++++++ |
| 18 | lib/CodeGen/Passes.cpp | 3 + |
| 19 | lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp | 3 + |
| 20 | lib/Target/LLVMBuild.txt | 2 +- |
| 21 | lib/VMCore/Verifier.cpp | 10 + |
| 22 | .../CodeGen/X86/EmbeddedCG/embedded-codegen-ptx.ll | 28 +++ |
| 23 | test/CodeGen/X86/EmbeddedCG/lit.local.cfg | 5 + |
| 24 | 13 files changed, 325 insertions(+), 1 deletions(-) |
| 25 | create mode 100644 lib/CodeGen/CodeGenIntrinsic.cpp |
| 26 | create mode 100644 test/CodeGen/X86/EmbeddedCG/embedded-codegen-ptx.ll |
| 27 | create mode 100644 test/CodeGen/X86/EmbeddedCG/lit.local.cfg |
| 28 | |
| 29 | diff --git a/docs/LangRef.html b/docs/LangRef.html |
| 30 | index 8f7a17c..23d73bd 100644 |
| 31 | --- a/docs/LangRef.html |
| 32 | +++ b/docs/LangRef.html |
| 33 | @@ -242,6 +242,7 @@ |
| 34 | <li><a href="#int_prefetch">'<tt>llvm.prefetch</tt>' Intrinsic</a></li> |
| 35 | <li><a href="#int_pcmarker">'<tt>llvm.pcmarker</tt>' Intrinsic</a></li> |
| 36 | <li><a href="#int_readcyclecounter">'<tt>llvm.readcyclecounter</tt>' Intrinsic</a></li> |
| 37 | + <li><a href="#int_codegen_intrinsic">'<tt>llvm.codegen</tt>' Intrinsic</a></li> |
| 38 | </ol> |
| 39 | </li> |
| 40 | <li><a href="#int_libc">Standard C Library Intrinsics</a> |
| 41 | @@ -7015,6 +7016,41 @@ LLVM</a>.</p> |
| 42 | |
| 43 | </div> |
| 44 | |
| 45 | +<!-- _______________________________________________________________________ --> |
| 46 | +<h4> |
| 47 | + <a name="int_codegen_intrinsic">'<tt>llvm.codegen</tt>' Intrinsic</a> |
| 48 | +</h4> |
| 49 | + |
| 50 | +<div> |
| 51 | + |
| 52 | +<h5>Syntax:</h5> |
| 53 | +<pre> |
| 54 | + declare i8* @llvm.codegen(i8* <IRString>, i8* <MCPU>, i8* < |
| 55 | + Features>) |
| 56 | +</pre> |
| 57 | + |
| 58 | +<h5>Overview:</h5> |
| 59 | +<p>The '<tt>llvm.codegen</tt>' intrinsic uses the LLVM back ends to generate |
| 60 | + code for embedded LLVM-IR strings. The code generation target can be |
| 61 | + different to the one of the parent module.</p> |
| 62 | + |
| 63 | +<h5>Arguments:</h5> |
| 64 | +<p><tt>IRString</tt> is a string containing LLVM-IR.</p> |
| 65 | +<p><tt>MCPU</tt> is the name of the target CPU.</p> |
| 66 | +<p><tt>Features</tt> is the string representation of the additional target |
| 67 | + features.</p> |
| 68 | + |
| 69 | +<h5>Semantics:</h5> |
| 70 | +<p>The '<tt>llvm.codegen</tt>' intrinsic transforms a string containing LLVM IR |
| 71 | + to target assembly code. Calls to the intrinsic are replaced by a pointer to |
| 72 | + the newly generated target code. In case LLVM can not generate code (e.g. the |
| 73 | + target is not available), the call to the intrinsic is replaced by a i8 NULL |
| 74 | + pointer.Users of this intrinsic should make sure the target triple is |
| 75 | + properly set in the <IRString>. Inputs to both <MCPU> and |
| 76 | + <Features> parameters can be null pointers.</p> |
| 77 | + |
| 78 | +</div> |
| 79 | + |
| 80 | </div> |
| 81 | |
| 82 | <!-- ======================================================================= --> |
| 83 | diff --git a/include/llvm/CodeGen/Passes.h b/include/llvm/CodeGen/Passes.h |
| 84 | index e76fe99..ecf3e4b 100644 |
| 85 | --- a/include/llvm/CodeGen/Passes.h |
| 86 | +++ b/include/llvm/CodeGen/Passes.h |
| 87 | @@ -373,6 +373,9 @@ namespace llvm { |
| 88 | /// branch folding). |
| 89 | extern char &GCMachineCodeAnalysisID; |
| 90 | |
| 91 | + /// CodeGenIntrinsic Pass - Create target code for embedded LLVM-IR strings. |
| 92 | + FunctionPass *createCodeGenIntrinsicPass(); |
| 93 | + |
| 94 | /// Deleter Pass - Releases GC metadata. |
| 95 | /// |
| 96 | FunctionPass *createGCInfoDeleter(); |
| 97 | diff --git a/include/llvm/InitializePasses.h b/include/llvm/InitializePasses.h |
| 98 | index df696b1..28b146f 100644 |
| 99 | --- a/include/llvm/InitializePasses.h |
| 100 | +++ b/include/llvm/InitializePasses.h |
| 101 | @@ -91,6 +91,7 @@ void initializeCorrelatedValuePropagationPass(PassRegistry&); |
| 102 | void initializeDAEPass(PassRegistry&); |
| 103 | void initializeDAHPass(PassRegistry&); |
| 104 | void initializeDCEPass(PassRegistry&); |
| 105 | +void initializeCodeGenIntrinsicPass(PassRegistry&); |
| 106 | void initializeDSEPass(PassRegistry&); |
| 107 | void initializeDeadInstEliminationPass(PassRegistry&); |
| 108 | void initializeDeadMachineInstructionElimPass(PassRegistry&); |
| 109 | diff --git a/include/llvm/Intrinsics.td b/include/llvm/Intrinsics.td |
| 110 | index 75162bf..4bcec16 100644 |
| 111 | --- a/include/llvm/Intrinsics.td |
| 112 | +++ b/include/llvm/Intrinsics.td |
| 113 | @@ -226,6 +226,10 @@ def int_readcyclecounter : Intrinsic<[llvm_i64_ty]>; |
| 114 | // guard to the correct place on the stack frame. |
| 115 | def int_stackprotector : Intrinsic<[], [llvm_ptr_ty, llvm_ptrptr_ty], []>; |
| 116 | |
| 117 | +//===----------------- Code Generation for Embedded LLVM-IR ---------------===// |
| 118 | +def int_codegen : Intrinsic<[llvm_ptr_ty], |
| 119 | + [llvm_ptr_ty, llvm_ptr_ty, llvm_ptr_ty]>; |
| 120 | + |
| 121 | //===------------------- Standard C Library Intrinsics --------------------===// |
| 122 | // |
| 123 | |
| 124 | diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt |
| 125 | index 855fa0c..4451922 100644 |
| 126 | --- a/lib/CodeGen/CMakeLists.txt |
| 127 | +++ b/lib/CodeGen/CMakeLists.txt |
| 128 | @@ -6,6 +6,7 @@ add_llvm_library(LLVMCodeGen |
| 129 | CalcSpillWeights.cpp |
| 130 | CallingConvLower.cpp |
| 131 | CodeGen.cpp |
| 132 | + CodeGenIntrinsic.cpp |
| 133 | CodePlacementOpt.cpp |
| 134 | CriticalAntiDepBreaker.cpp |
| 135 | DeadMachineInstructionElim.cpp |
| 136 | diff --git a/lib/CodeGen/CodeGen.cpp b/lib/CodeGen/CodeGen.cpp |
| 137 | index a81bb5c..662345a 100644 |
| 138 | --- a/lib/CodeGen/CodeGen.cpp |
| 139 | +++ b/lib/CodeGen/CodeGen.cpp |
| 140 | @@ -21,6 +21,7 @@ using namespace llvm; |
| 141 | void llvm::initializeCodeGen(PassRegistry &Registry) { |
| 142 | initializeBranchFolderPassPass(Registry); |
| 143 | initializeCalculateSpillWeightsPass(Registry); |
| 144 | + initializeCodeGenIntrinsicPass(Registry); |
| 145 | initializeCodePlacementOptPass(Registry); |
| 146 | initializeDeadMachineInstructionElimPass(Registry); |
| 147 | initializeExpandPostRAPass(Registry); |
| 148 | diff --git a/lib/CodeGen/CodeGenIntrinsic.cpp b/lib/CodeGen/CodeGenIntrinsic.cpp |
| 149 | new file mode 100644 |
| 150 | index 0000000..01253cd |
| 151 | --- /dev/null |
| 152 | +++ b/lib/CodeGen/CodeGenIntrinsic.cpp |
| 153 | @@ -0,0 +1,229 @@ |
| 154 | +//===-- CodeGenIntrinsic.cpp - CodeGen Intrinsic --------------------------===// |
| 155 | +// |
| 156 | +// The LLVM Compiler Infrastructure |
| 157 | +// |
| 158 | +// This file is distributed under the University of Illinois Open Source |
| 159 | +// License. See LICENSE.TXT for details. |
| 160 | +// |
| 161 | +//===----------------------------------------------------------------------===// |
| 162 | +// |
| 163 | +// This file implements the llvm.codegen intrinsic. |
| 164 | +// |
| 165 | +//===----------------------------------------------------------------------===// |
| 166 | + |
| 167 | +#include "llvm/CodeGen/Passes.h" |
| 168 | +#include "llvm/CallingConv.h" |
| 169 | +#include "llvm/IntrinsicInst.h" |
| 170 | +#include "llvm/LLVMContext.h" |
| 171 | +#include "llvm/Module.h" |
| 172 | +#include "llvm/PassManager.h" |
| 173 | +#include "llvm/Assembly/Parser.h" |
| 174 | +#include "llvm/Target/TargetData.h" |
| 175 | +#include "llvm/Target/TargetMachine.h" |
| 176 | +#include "llvm/Target/TargetRegisterInfo.h" |
| 177 | +#include "llvm/Support/Debug.h" |
| 178 | +#include "llvm/Support/ErrorHandling.h" |
| 179 | +#include "llvm/Support/FormattedStream.h" |
| 180 | +#include "llvm/Support/Host.h" |
| 181 | +#include "llvm/Support/IRBuilder.h" |
| 182 | +#include "llvm/Support/raw_ostream.h" |
| 183 | +#include "llvm/Support/SourceMgr.h" |
| 184 | +#include "llvm/Support/TargetRegistry.h" |
| 185 | +#include "llvm/ADT/Triple.h" |
| 186 | + |
| 187 | +using namespace llvm; |
| 188 | + |
| 189 | +namespace { |
| 190 | + /// ASMGenerator generates target-specific assembly code from LLVM IR. |
| 191 | + class ASMGenerator { |
| 192 | + public: |
| 193 | + ASMGenerator() {} |
| 194 | + |
| 195 | + /// generate - Generates a target code string from a LLVM IR Value. |
| 196 | + bool generate(Value *IRStr, Value *MCPUStr, Value *FeaturesStr, |
| 197 | + std::string &ASM); |
| 198 | + |
| 199 | + private: |
| 200 | + bool getStringFromConstantExpr(Value *ConstData, std::string &Out) const; |
| 201 | + }; |
| 202 | + |
| 203 | + /// CodeGenIntrinsic - This pass replaces each call to the llvm.codegen |
| 204 | + /// intrinsic with a string generated by ASMGenerator. |
| 205 | + class CodeGenIntrinsic : public FunctionPass { |
| 206 | + public: |
| 207 | + static char ID; |
| 208 | + |
| 209 | + CodeGenIntrinsic(); |
| 210 | + const char *getPassName() const; |
| 211 | + virtual bool runOnFunction(Function &F); |
| 212 | + }; |
| 213 | +} |
| 214 | + |
| 215 | +// ----------------------------------------------------------------------------- |
| 216 | +static bool getTargetMachineFromModule(Module *M, const StringRef &TripleStr, |
| 217 | + const StringRef &MCPU, |
| 218 | + const StringRef &Features, |
| 219 | + TargetMachine *&TM) { |
| 220 | + std::string ErrMsg; |
| 221 | + const Target *TheTarget = TargetRegistry::lookupTarget(TripleStr, ErrMsg); |
| 222 | + if (!TheTarget) { |
| 223 | + errs() << ErrMsg << "\n"; |
| 224 | + return false; |
| 225 | + } |
| 226 | + |
| 227 | + TargetOptions Options; |
| 228 | + TM = TheTarget->createTargetMachine(TripleStr, MCPU, Features, Options); |
| 229 | + assert(TM && "Could not allocate target machine!"); |
| 230 | + return true; |
| 231 | +} |
| 232 | + |
| 233 | +static bool createASMAsString(Module *New, const StringRef &Triple, |
| 234 | + const StringRef &MCPU, const StringRef &Features, |
| 235 | + std::string &ASM) { |
| 236 | + TargetMachine *Target; |
| 237 | + if (!getTargetMachineFromModule(New, Triple, MCPU, Features, Target)) { |
| 238 | + return false; |
| 239 | + } |
| 240 | + |
| 241 | + // Build up all of the passes that we want to do to the module. |
| 242 | + PassManager PM; |
| 243 | + |
| 244 | + // Add the target data from the target machine, if it exists, or the module. |
| 245 | + if (const TargetData *TD = Target->getTargetData()) |
| 246 | + PM.add(new TargetData(*TD)); |
| 247 | + else |
| 248 | + PM.add(new TargetData(New)); |
| 249 | + |
| 250 | + { |
| 251 | + raw_string_ostream NameROS(ASM); |
| 252 | + formatted_raw_ostream FOS(NameROS); |
| 253 | + |
| 254 | + // Ask the target to add backend passes as necessary. |
| 255 | + int UseVerifier = true; |
| 256 | + if (Target->addPassesToEmitFile(PM, FOS, TargetMachine::CGFT_AssemblyFile, |
| 257 | + UseVerifier)) { |
| 258 | + errs() << "CodeGen Intrinsic: target does not support generation of this " |
| 259 | + << "file type!\n"; |
| 260 | + |
| 261 | + return false; |
| 262 | + } |
| 263 | + |
| 264 | + PM.run(*New); |
| 265 | + FOS.flush(); |
| 266 | + } |
| 267 | + |
| 268 | + delete Target; |
| 269 | + return true; |
| 270 | +} |
| 271 | + |
| 272 | +bool ASMGenerator::getStringFromConstantExpr(Value *ConstData, |
| 273 | + std::string &Out) const { |
| 274 | + bool Result = false; |
| 275 | + if (ConstantExpr *U = dyn_cast<ConstantExpr>(ConstData)) { |
| 276 | + Value *R = U->getOperand(0); |
| 277 | + if (GlobalVariable *GV = dyn_cast<GlobalVariable>(R)) { |
| 278 | + Constant *C = GV->getInitializer(); |
| 279 | + if (ConstantDataArray *CA = dyn_cast<ConstantDataArray>(C)) { |
| 280 | + Out = CA->getAsString(); |
| 281 | + Result = true; |
| 282 | + } |
| 283 | + } |
| 284 | + } |
| 285 | + return Result; |
| 286 | +} |
| 287 | + |
| 288 | +bool ASMGenerator::generate(Value *IRStr, Value *MCPUStr, Value *FeaturesStr, |
| 289 | + std::string &ASM) { |
| 290 | + std::string Kernel; |
| 291 | + if (!getStringFromConstantExpr(IRStr, Kernel)) |
| 292 | + return false; |
| 293 | + |
| 294 | + std::string MCPU; |
| 295 | + if (!getStringFromConstantExpr(MCPUStr, MCPU)) |
| 296 | + MCPU = ""; |
| 297 | + |
| 298 | + std::string Features; |
| 299 | + if (!getStringFromConstantExpr(FeaturesStr, Features)) |
| 300 | + Features = ""; |
| 301 | + |
| 302 | + SMDiagnostic ErrorMessage; |
| 303 | + LLVMContext Context; |
| 304 | + std::auto_ptr<Module> TempModule( |
| 305 | + ParseAssemblyString(Kernel.c_str(), 0, ErrorMessage, Context)); |
| 306 | + |
| 307 | + Triple TheTriple(TempModule->getTargetTriple()); |
| 308 | + const std::string TripleStr = TheTriple.getTriple(); |
| 309 | + if(TripleStr.empty()) { |
| 310 | + errs() << "error: Target triple isn't set correctly for the new module.\n"; |
| 311 | + return false; |
| 312 | + } |
| 313 | + |
| 314 | + return createASMAsString(TempModule.get(), TripleStr.data(), MCPU.data(), |
| 315 | + Features.data(), ASM); |
| 316 | +} |
| 317 | + |
| 318 | +// ----------------------------------------------------------------------------- |
| 319 | +INITIALIZE_PASS(CodeGenIntrinsic, "codegen-intrinsic", "CodeGen Intrinsic", |
| 320 | + false, false) |
| 321 | + |
| 322 | +FunctionPass *llvm::createCodeGenIntrinsicPass() { |
| 323 | + return new CodeGenIntrinsic(); |
| 324 | +} |
| 325 | + |
| 326 | +char CodeGenIntrinsic::ID = 0; |
| 327 | + |
| 328 | +CodeGenIntrinsic::CodeGenIntrinsic() |
| 329 | + : FunctionPass(ID) { |
| 330 | +} |
| 331 | + |
| 332 | +const char *CodeGenIntrinsic::getPassName() const { |
| 333 | + return "Lowering CodeGen Intrinsic."; |
| 334 | +} |
| 335 | + |
| 336 | +bool CodeGenIntrinsic::runOnFunction(Function &F) { |
| 337 | + bool MadeChange = false; |
| 338 | + Module *M = F.getParent(); |
| 339 | + if (Function *CG = M->getFunction("llvm.codegen")) { |
| 340 | + for (Function::use_iterator I = CG->use_begin(), E = CG->use_end(); |
| 341 | + I != E; ++I) { |
| 342 | + if (CallInst *CI = dyn_cast<CallInst>(*I)) { |
| 343 | + if (&F != CI->getParent()->getParent()) |
| 344 | + continue; |
| 345 | + |
| 346 | + std::string ASM; |
| 347 | + ASMGenerator *Generator = new ASMGenerator(); |
| 348 | + IRBuilder<> Builder(CI->getParent(), CI); |
| 349 | + Value *St; |
| 350 | + if (!Generator->generate(CI->getArgOperand(0), CI->getArgOperand(1), |
| 351 | + CI->getArgOperand(2), ASM)) { |
| 352 | + Type *Ty= CG->getReturnType(); |
| 353 | + St = Constant::getNullValue(Ty); |
| 354 | + } else { |
| 355 | + // FIXME: Is this actually dangerous as WritingAnLLVMPass.html claims? |
| 356 | + // Seems that, short of multithreaded LLVM, it should be safe; |
| 357 | + // all that is necessary is that a simple Module::iterator loop |
| 358 | + // not be invalidated. Appending to the GlobalVariable list is |
| 359 | + // safe in that sense. |
| 360 | + // |
| 361 | + // All the output passes emit globals last. The ExecutionEngine |
| 362 | + // explicitly supports adding globals to the module after |
| 363 | + // initialization. |
| 364 | + // |
| 365 | + // Still, if it isn't deemed acceptable, then this |
| 366 | + // transformation needs to be a ModulePass (which means it |
| 367 | + // cannot be in the 'llc' pipeline (which uses a |
| 368 | + // FunctionPassManager (which segfaults (not asserts) if |
| 369 | + // provided a ModulePass))). |
| 370 | + St = Builder.CreateGlobalStringPtr(ASM, "ASM"); |
| 371 | + } |
| 372 | + CI->replaceAllUsesWith(St); |
| 373 | + CI->eraseFromParent(); |
| 374 | + // We should erase the unused globals from current module. But we |
| 375 | + // can't do this within a FunctionPass. |
| 376 | + MadeChange = true; |
| 377 | + } |
| 378 | + } |
| 379 | + } |
| 380 | + |
| 381 | + return MadeChange; |
| 382 | +} |
| 383 | diff --git a/lib/CodeGen/Passes.cpp b/lib/CodeGen/Passes.cpp |
| 384 | index 490547b..95b6e0c 100644 |
| 385 | --- a/lib/CodeGen/Passes.cpp |
| 386 | +++ b/lib/CodeGen/Passes.cpp |
| 387 | @@ -305,6 +305,9 @@ void TargetPassConfig::addIRPasses() { |
| 388 | |
| 389 | PM->add(createGCLoweringPass()); |
| 390 | |
| 391 | + // Generate target code for embedded LLVM-IR strings. |
| 392 | + PM->add(createCodeGenIntrinsicPass()); |
| 393 | + |
| 394 | // Make sure that no unreachable blocks are instruction selected. |
| 395 | PM->add(createUnreachableBlockEliminationPass()); |
| 396 | } |
| 397 | diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp |
| 398 | index f1b4d80..d87986c 100644 |
| 399 | --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp |
| 400 | +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp |
| 401 | @@ -5131,6 +5131,9 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { |
| 402 | case Intrinsic::lifetime_end: |
| 403 | // Discard region information. |
| 404 | return 0; |
| 405 | + |
| 406 | + case Intrinsic::codegen: |
| 407 | + llvm_unreachable("failed to lower codegen intrinsic!"); |
| 408 | } |
| 409 | } |
| 410 | |
| 411 | diff --git a/lib/Target/LLVMBuild.txt b/lib/Target/LLVMBuild.txt |
| 412 | index 045ab9e..d9bd19f 100644 |
| 413 | --- a/lib/Target/LLVMBuild.txt |
| 414 | +++ b/lib/Target/LLVMBuild.txt |
| 415 | @@ -45,7 +45,7 @@ parent = Libraries |
| 416 | type = Library |
| 417 | name = Target |
| 418 | parent = Libraries |
| 419 | -required_libraries = Core MC Support |
| 420 | +required_libraries = Core MC Support AsmParser |
| 421 | |
| 422 | ; This is a special group whose required libraries are extended (by llvm-build) |
| 423 | ; with every built target, which makes it easy for tools to include every |
| 424 | diff --git a/lib/VMCore/Verifier.cpp b/lib/VMCore/Verifier.cpp |
| 425 | index f11efff..1031685 100644 |
| 426 | --- a/lib/VMCore/Verifier.cpp |
| 427 | +++ b/lib/VMCore/Verifier.cpp |
| 428 | @@ -1783,6 +1783,16 @@ void Verifier::visitIntrinsicFunctionCall(Intrinsic::ID ID, CallInst &CI) { |
| 429 | Assert1(isa<ConstantInt>(CI.getArgOperand(1)), |
| 430 | "llvm.invariant.end parameter #2 must be a constant integer", &CI); |
| 431 | break; |
| 432 | + case Intrinsic::codegen: |
| 433 | + Assert1(isa<ConstantExpr>(CI.getArgOperand(0)), |
| 434 | + "llvm.codegen parameter #1 must be a constant expression", &CI); |
| 435 | + Assert1(isa<ConstantExpr>(CI.getArgOperand(1)) || |
| 436 | + isa<ConstantPointerNull>(CI.getArgOperand(1)), |
| 437 | + "llvm.codegen parameter #2 must be a constant expression", &CI); |
| 438 | + Assert1(isa<ConstantExpr>(CI.getArgOperand(2)) || |
| 439 | + isa<ConstantPointerNull>(CI.getArgOperand(2)), |
| 440 | + "llvm.codegen parameter #3 must be a constant expression", &CI); |
| 441 | + break; |
| 442 | } |
| 443 | } |
| 444 | |
| 445 | diff --git a/test/CodeGen/X86/EmbeddedCG/embedded-codegen-ptx.ll b/test/CodeGen/X86/EmbeddedCG/embedded-codegen-ptx.ll |
| 446 | new file mode 100644 |
| 447 | index 0000000..768790d |
| 448 | --- /dev/null |
| 449 | +++ b/test/CodeGen/X86/EmbeddedCG/embedded-codegen-ptx.ll |
| 450 | @@ -0,0 +1,28 @@ |
| 451 | +; RUN: llc < %s -march=x86 | FileCheck %s |
| 452 | + |
| 453 | +; ModuleID = 'embedded-codegen-ptx.ll' |
| 454 | +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32-S128" |
| 455 | +target triple = "i386-pc-linux-gnu" |
| 456 | + |
| 457 | +@llvm_kernel = private unnamed_addr constant [1937 x i8] c"target triple = \22ptx32-pc-linux-gnu\22\0A\0Adefine internal ptx_kernel void @gpu_codegen.ptx_subfn(i8* %ptx.Array) {\0Aptx.setup:\0A %0 = bitcast i8* %ptx.Array to [128 x [128 x i32]]*\0A %1 = call i32 @llvm.ptx.read.nctaid.x()\0A %2 = zext i32 %1 to i64\0A %3 = call i32 @llvm.ptx.read.nctaid.y()\0A %4 = zext i32 %3 to i64\0A %5 = call i32 @llvm.ptx.read.ntid.x()\0A %6 = zext i32 %5 to i64\0A %7 = call i32 @llvm.ptx.read.ntid.y()\0A %8 = zext i32 %7 to i64\0A %9 = call i32 @llvm.ptx.read.ctaid.x()\0A %10 = zext i32 %9 to i64\0A %11 = call i32 @llvm.ptx.read.ctaid.y()\0A %12 = zext i32 %11 to i64\0A %13 = call i32 @llvm.ptx.read.tid.x()\0A %14 = zext i32 %13 to i64\0A %15 = call i32 @llvm.ptx.read.tid.y()\0A %16 = zext i32 %15 to i64\0A br label %ptx.loop_body\0A\0Aptx.exit: ; preds = %polly.stmt.for.body3\0A ret void\0A\0Aptx.loop_body: ; preds = %ptx.setup\0A %p_gpu_index_i = mul i64 %12, %2\0A %17 = add i64 %p_gpu_index_i, %10\0A %p_gpu_index_j = mul i64 %16, %6\0A %18 = add i64 %p_gpu_index_j, %14\0A br label %polly.stmt.for.body3\0A\0Apolly.stmt.for.body3: ; preds = %ptx.loop_body\0A %19 = trunc i64 %17 to i32\0A %p_mul = shl nsw i32 %19, 7\0A %20 = trunc i64 %18 to i32\0A %p_add = add nsw i32 %p_mul, %20\0A %21 = trunc i64 %17 to i32\0A %22 = trunc i64 %18 to i32\0A %p_arrayidx4 = getelementptr inbounds [128 x [128 x i32]]* %0, i32 0, i32 %21, i32 %22\0A store i32 %p_add, i32* %p_arrayidx4\0A br label %ptx.exit\0A}\0A\0Adeclare i32 @llvm.ptx.read.nctaid.x() nounwind readnone\0A\0Adeclare i32 @llvm.ptx.read.nctaid.y() nounwind readnone\0A\0Adeclare i32 @llvm.ptx.read.ctaid.x() nounwind readnone\0A\0Adeclare i32 @llvm.ptx.read.ctaid.y() nounwind readnone\0A\0Adeclare i32 @llvm.ptx.read.ntid.x() nounwind readnone\0A\0Adeclare i32 @llvm.ptx.read.ntid.y() nounwind readnone\0A\0Adeclare i32 @llvm.ptx.read.tid.x() nounwind readnone\0A\0Adeclare i32 @llvm.ptx.read.tid.y() nounwind readnone\0A\00" |
| 458 | + |
| 459 | +@.str = private unnamed_addr constant [3 x i8] c"%s\00", align 1 |
| 460 | + |
| 461 | +define i32 @gpu_codegen() nounwind { |
| 462 | +entry: |
| 463 | + %0 = call i8* @llvm.codegen(i8* getelementptr inbounds ([1937 x i8]* @llvm_kernel, i32 0, i32 0), i8* null, i8* null) |
| 464 | + %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([3 x i8]* @.str, i32 0, i32 0), i8* %0) |
| 465 | + ret i32 0 |
| 466 | +} |
| 467 | + |
| 468 | +define i32 @main() nounwind { |
| 469 | +entry: |
| 470 | + %call = call i32 @gpu_codegen() |
| 471 | + ret i32 0 |
| 472 | +} |
| 473 | + |
| 474 | +declare i8* @llvm.codegen(i8*, i8*, i8*) nounwind |
| 475 | + |
| 476 | +declare i32 @printf(i8*, ...) nounwind |
| 477 | + |
| 478 | +; CHECK: .entry gpu_codegen_2E_ptx_subfn (.param .b32 __param_1) |
| 479 | diff --git a/test/CodeGen/X86/EmbeddedCG/lit.local.cfg b/test/CodeGen/X86/EmbeddedCG/lit.local.cfg |
| 480 | new file mode 100644 |
| 481 | index 0000000..346ffa1 |
| 482 | --- /dev/null |
| 483 | +++ b/test/CodeGen/X86/EmbeddedCG/lit.local.cfg |
| 484 | @@ -0,0 +1,5 @@ |
| 485 | +config.suffixes = ['.ll', '.c', '.cpp'] |
| 486 | + |
| 487 | +targets = set(config.root.targets_to_build.split()) |
| 488 | +if not 'PTX' in targets: |
| 489 | + config.unsupported = True |
| 490 | -- |
| 491 | 1.7.6.5 |
| 492 | |