Chris Lattner | 2eacf26 | 2004-01-05 05:25:10 +0000 | [diff] [blame] | 1 | //===-- ProgramInfo.cpp - Compute and cache info about a program ----------===// |
| 2 | // |
| 3 | // The LLVM Compiler Infrastructure |
| 4 | // |
| 5 | // This file was developed by the LLVM research group and is distributed under |
| 6 | // the University of Illinois Open Source License. See LICENSE.TXT for details. |
| 7 | // |
| 8 | //===----------------------------------------------------------------------===// |
| 9 | // |
| 10 | // This file implements the ProgramInfo and related classes, by sorting through |
| 11 | // the loaded Module. |
| 12 | // |
| 13 | //===----------------------------------------------------------------------===// |
| 14 | |
| 15 | #include "llvm/Debugger/ProgramInfo.h" |
| 16 | #include "llvm/Constants.h" |
| 17 | #include "llvm/DerivedTypes.h" |
| 18 | #include "llvm/Intrinsics.h" |
| 19 | #include "llvm/iOther.h" |
| 20 | #include "llvm/Module.h" |
| 21 | #include "llvm/Debugger/SourceFile.h" |
| 22 | #include "llvm/Debugger/SourceLanguage.h" |
| 23 | #include "Support/FileUtilities.h" |
| 24 | #include "Support/SlowOperationInformer.h" |
| 25 | #include "Support/STLExtras.h" |
| 26 | using namespace llvm; |
| 27 | |
| 28 | /// getGlobalVariablesUsing - Return all of the global variables which have the |
| 29 | /// specified value in their initializer somewhere. |
| 30 | static void getGlobalVariablesUsing(Value *V, |
| 31 | std::vector<GlobalVariable*> &Found) { |
| 32 | for (Value::use_iterator I = V->use_begin(), E = V->use_end(); I != E; ++I) { |
| 33 | if (GlobalVariable *GV = dyn_cast<GlobalVariable>(*I)) |
| 34 | Found.push_back(GV); |
| 35 | else if (Constant *C = dyn_cast<Constant>(*I)) |
| 36 | getGlobalVariablesUsing(C, Found); |
| 37 | } |
| 38 | } |
| 39 | |
| 40 | /// getStringValue - Turn an LLVM constant pointer that eventually points to a |
| 41 | /// global into a string value. Return an empty string if we can't do it. |
| 42 | /// |
| 43 | static std::string getStringValue(Value *V, unsigned Offset = 0) { |
| 44 | if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) { |
| 45 | if (GV->hasInitializer() && isa<ConstantArray>(GV->getInitializer())) { |
| 46 | ConstantArray *Init = cast<ConstantArray>(GV->getInitializer()); |
Chris Lattner | e3f84f5 | 2004-01-14 17:07:46 +0000 | [diff] [blame^] | 47 | if (Init->isString()) { |
Chris Lattner | 2eacf26 | 2004-01-05 05:25:10 +0000 | [diff] [blame] | 48 | std::string Result = Init->getAsString(); |
| 49 | if (Offset < Result.size()) { |
| 50 | // If we are pointing INTO The string, erase the beginning... |
| 51 | Result.erase(Result.begin(), Result.begin()+Offset); |
| 52 | |
| 53 | // Take off the null terminator, and any string fragments after it. |
| 54 | std::string::size_type NullPos = Result.find_first_of((char)0); |
| 55 | if (NullPos != std::string::npos) |
| 56 | Result.erase(Result.begin()+NullPos, Result.end()); |
| 57 | return Result; |
| 58 | } |
| 59 | } |
| 60 | } |
| 61 | } else if (Constant *C = dyn_cast<Constant>(V)) { |
| 62 | if (ConstantPointerRef *CPR = dyn_cast<ConstantPointerRef>(C)) |
| 63 | return getStringValue(CPR->getValue(), Offset); |
| 64 | else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) { |
| 65 | if (CE->getOpcode() == Instruction::GetElementPtr) { |
| 66 | // Turn a gep into the specified offset. |
| 67 | if (CE->getNumOperands() == 3 && |
| 68 | cast<Constant>(CE->getOperand(1))->isNullValue() && |
| 69 | isa<ConstantInt>(CE->getOperand(2))) { |
| 70 | return getStringValue(CE->getOperand(0), |
| 71 | Offset+cast<ConstantInt>(CE->getOperand(2))->getRawValue()); |
| 72 | } |
| 73 | } |
| 74 | } |
| 75 | } |
| 76 | return ""; |
| 77 | } |
| 78 | |
| 79 | /// getNextStopPoint - Follow the def-use chains of the specified LLVM value, |
| 80 | /// traversing the use chains until we get to a stoppoint. When we do, return |
| 81 | /// the source location of the stoppoint. If we don't find a stoppoint, return |
| 82 | /// null. |
| 83 | static const GlobalVariable *getNextStopPoint(const Value *V, unsigned &LineNo, |
| 84 | unsigned &ColNo) { |
| 85 | // The use-def chains can fork. As such, we pick the lowest numbered one we |
| 86 | // find. |
| 87 | const GlobalVariable *LastDesc = 0; |
| 88 | unsigned LastLineNo = ~0; |
| 89 | unsigned LastColNo = ~0; |
| 90 | |
| 91 | for (Value::use_const_iterator UI = V->use_begin(), E = V->use_end(); |
| 92 | UI != E; ++UI) { |
| 93 | bool ShouldRecurse = true; |
| 94 | if (cast<Instruction>(*UI)->getOpcode() == Instruction::PHI) { |
| 95 | // Infinite loops == bad, ignore PHI nodes. |
| 96 | ShouldRecurse = false; |
| 97 | } else if (const CallInst *CI = dyn_cast<CallInst>(*UI)) { |
| 98 | // If we found a stop point, check to see if it is earlier than what we |
| 99 | // already have. If so, remember it. |
| 100 | if (const Function *F = CI->getCalledFunction()) |
| 101 | if (F->getIntrinsicID() == Intrinsic::dbg_stoppoint) { |
| 102 | unsigned CurLineNo = ~0, CurColNo = ~0; |
| 103 | const GlobalVariable *CurDesc = 0; |
| 104 | if (const ConstantInt *C = dyn_cast<ConstantInt>(CI->getOperand(2))) |
| 105 | CurLineNo = C->getRawValue(); |
| 106 | if (const ConstantInt *C = dyn_cast<ConstantInt>(CI->getOperand(3))) |
| 107 | CurColNo = C->getRawValue(); |
| 108 | const Value *Op = CI->getOperand(4); |
| 109 | if (const ConstantPointerRef *CPR = dyn_cast<ConstantPointerRef>(Op)) |
| 110 | Op = CPR->getValue(); |
| 111 | |
| 112 | if ((CurDesc = dyn_cast<GlobalVariable>(Op)) && |
| 113 | (LineNo < LastLineNo || |
| 114 | (LineNo == LastLineNo && ColNo < LastColNo))) { |
| 115 | LastDesc = CurDesc; |
| 116 | LastLineNo = CurLineNo; |
| 117 | LastColNo = CurColNo; |
| 118 | } |
| 119 | ShouldRecurse = false; |
| 120 | } |
| 121 | |
| 122 | } |
| 123 | |
| 124 | // If this is not a phi node or a stopping point, recursively scan the users |
| 125 | // of this instruction to skip over region.begin's and the like. |
| 126 | if (ShouldRecurse) { |
| 127 | unsigned CurLineNo, CurColNo; |
| 128 | if (const GlobalVariable *GV = getNextStopPoint(*UI, CurLineNo,CurColNo)){ |
| 129 | if (LineNo < LastLineNo || (LineNo == LastLineNo && ColNo < LastColNo)){ |
| 130 | LastDesc = GV; |
| 131 | LastLineNo = CurLineNo; |
| 132 | LastColNo = CurColNo; |
| 133 | } |
| 134 | } |
| 135 | } |
| 136 | } |
| 137 | |
| 138 | if (LastDesc) { |
| 139 | LineNo = LastLineNo != ~0U ? LastLineNo : 0; |
| 140 | ColNo = LastColNo != ~0U ? LastColNo : 0; |
| 141 | } |
| 142 | return LastDesc; |
| 143 | } |
| 144 | |
| 145 | |
| 146 | //===----------------------------------------------------------------------===// |
| 147 | // SourceFileInfo implementation |
| 148 | // |
| 149 | |
| 150 | SourceFileInfo::SourceFileInfo(const GlobalVariable *Desc, |
| 151 | const SourceLanguage &Lang) |
| 152 | : Language(&Lang), Descriptor(Desc) { |
| 153 | Version = 0; |
| 154 | SourceText = 0; |
| 155 | |
| 156 | if (Desc && Desc->hasInitializer()) |
| 157 | if (ConstantStruct *CS = dyn_cast<ConstantStruct>(Desc->getInitializer())) |
| 158 | if (CS->getNumOperands() > 4) { |
| 159 | if (ConstantUInt *CUI = dyn_cast<ConstantUInt>(CS->getOperand(1))) |
| 160 | Version = CUI->getValue(); |
| 161 | |
| 162 | BaseName = getStringValue(CS->getOperand(3)); |
| 163 | Directory = getStringValue(CS->getOperand(4)); |
| 164 | } |
| 165 | } |
| 166 | |
| 167 | SourceFileInfo::~SourceFileInfo() { |
| 168 | delete SourceText; |
| 169 | } |
| 170 | |
| 171 | SourceFile &SourceFileInfo::getSourceText() const { |
| 172 | // FIXME: this should take into account the source search directories! |
| 173 | if (SourceText == 0) // Read the file in if we haven't already. |
| 174 | if (!Directory.empty() && FileOpenable(Directory+"/"+BaseName)) |
| 175 | SourceText = new SourceFile(Directory+"/"+BaseName, Descriptor); |
| 176 | else |
| 177 | SourceText = new SourceFile(BaseName, Descriptor); |
| 178 | return *SourceText; |
| 179 | } |
| 180 | |
| 181 | |
| 182 | //===----------------------------------------------------------------------===// |
| 183 | // SourceFunctionInfo implementation |
| 184 | // |
| 185 | SourceFunctionInfo::SourceFunctionInfo(ProgramInfo &PI, |
| 186 | const GlobalVariable *Desc) |
| 187 | : Descriptor(Desc) { |
| 188 | LineNo = ColNo = 0; |
| 189 | if (Desc && Desc->hasInitializer()) |
| 190 | if (ConstantStruct *CS = dyn_cast<ConstantStruct>(Desc->getInitializer())) |
| 191 | if (CS->getNumOperands() > 2) { |
| 192 | // Entry #1 is the file descriptor. |
| 193 | if (const ConstantPointerRef *CPR = |
| 194 | dyn_cast<ConstantPointerRef>(CS->getOperand(1))) |
| 195 | if (const GlobalVariable *GV = |
| 196 | dyn_cast<GlobalVariable>(CPR->getValue())) |
| 197 | SourceFile = &PI.getSourceFile(GV); |
| 198 | |
| 199 | // Entry #2 is the function name. |
| 200 | Name = getStringValue(CS->getOperand(2)); |
| 201 | } |
| 202 | } |
| 203 | |
| 204 | /// getSourceLocation - This method returns the location of the first stopping |
| 205 | /// point in the function. |
| 206 | void SourceFunctionInfo::getSourceLocation(unsigned &RetLineNo, |
| 207 | unsigned &RetColNo) const { |
| 208 | // If we haven't computed this yet... |
| 209 | if (!LineNo) { |
| 210 | // Look at all of the users of the function descriptor, looking for calls to |
| 211 | // %llvm.dbg.func.start. |
| 212 | for (Value::use_const_iterator UI = Descriptor->use_begin(), |
| 213 | E = Descriptor->use_end(); UI != E; ++UI) |
| 214 | if (const CallInst *CI = dyn_cast<CallInst>(*UI)) |
| 215 | if (const Function *F = CI->getCalledFunction()) |
| 216 | if (F->getIntrinsicID() == Intrinsic::dbg_func_start) { |
| 217 | // We found the start of the function. Check to see if there are |
| 218 | // any stop points on the use-list of the function start. |
| 219 | const GlobalVariable *SD = getNextStopPoint(CI, LineNo, ColNo); |
| 220 | if (SD) { // We found the first stop point! |
| 221 | // This is just a sanity check. |
| 222 | if (getSourceFile().getDescriptor() != SD) |
| 223 | std::cout << "WARNING: first line of function is not in the" |
| 224 | " file that the function descriptor claims it is in.\n"; |
| 225 | break; |
| 226 | } |
| 227 | } |
| 228 | } |
| 229 | RetLineNo = LineNo; RetColNo = ColNo; |
| 230 | } |
| 231 | |
| 232 | //===----------------------------------------------------------------------===// |
| 233 | // ProgramInfo implementation |
| 234 | // |
| 235 | |
| 236 | ProgramInfo::ProgramInfo(Module *m) : M(m) { |
| 237 | assert(M && "Cannot create program information with a null module!"); |
| 238 | ProgramTimeStamp = getFileTimestamp(M->getModuleIdentifier()); |
| 239 | |
| 240 | SourceFilesIsComplete = false; |
| 241 | SourceFunctionsIsComplete = false; |
| 242 | } |
| 243 | |
| 244 | ProgramInfo::~ProgramInfo() { |
| 245 | // Delete cached information about source program objects... |
| 246 | for (std::map<const GlobalVariable*, SourceFileInfo*>::iterator |
| 247 | I = SourceFiles.begin(), E = SourceFiles.end(); I != E; ++I) |
| 248 | delete I->second; |
| 249 | for (std::map<const GlobalVariable*, SourceFunctionInfo*>::iterator |
| 250 | I = SourceFunctions.begin(), E = SourceFunctions.end(); I != E; ++I) |
| 251 | delete I->second; |
| 252 | |
| 253 | // Delete the source language caches. |
| 254 | for (unsigned i = 0, e = LanguageCaches.size(); i != e; ++i) |
| 255 | delete LanguageCaches[i].second; |
| 256 | } |
| 257 | |
| 258 | |
| 259 | //===----------------------------------------------------------------------===// |
| 260 | // SourceFileInfo tracking... |
| 261 | // |
| 262 | |
| 263 | /// getSourceFile - Return source file information for the specified source file |
| 264 | /// descriptor object, adding it to the collection as needed. This method |
| 265 | /// always succeeds (is unambiguous), and is always efficient. |
| 266 | /// |
| 267 | const SourceFileInfo & |
| 268 | ProgramInfo::getSourceFile(const GlobalVariable *Desc) { |
| 269 | SourceFileInfo *&Result = SourceFiles[Desc]; |
| 270 | if (Result) return *Result; |
| 271 | |
| 272 | // Figure out what language this source file comes from... |
| 273 | unsigned LangID = 0; // Zero is unknown language |
| 274 | if (Desc && Desc->hasInitializer()) |
| 275 | if (ConstantStruct *CS = dyn_cast<ConstantStruct>(Desc->getInitializer())) |
| 276 | if (CS->getNumOperands() > 2) |
| 277 | if (ConstantUInt *CUI = dyn_cast<ConstantUInt>(CS->getOperand(2))) |
| 278 | LangID = CUI->getValue(); |
| 279 | |
| 280 | const SourceLanguage &Lang = SourceLanguage::get(LangID); |
| 281 | SourceFileInfo *New = Lang.createSourceFileInfo(Desc, *this); |
| 282 | |
| 283 | // FIXME: this should check to see if there is already a Filename/WorkingDir |
| 284 | // pair that matches this one. If so, we shouldn't create the duplicate! |
| 285 | // |
| 286 | SourceFileIndex.insert(std::make_pair(New->getBaseName(), New)); |
| 287 | return *(Result = New); |
| 288 | } |
| 289 | |
| 290 | |
| 291 | /// getSourceFiles - Index all of the source files in the program and return |
| 292 | /// a mapping of it. This information is lazily computed the first time |
| 293 | /// that it is requested. Since this information can take a long time to |
| 294 | /// compute, the user is given a chance to cancel it. If this occurs, an |
| 295 | /// exception is thrown. |
| 296 | const std::map<const GlobalVariable*, SourceFileInfo*> & |
| 297 | ProgramInfo::getSourceFiles(bool RequiresCompleteMap) { |
| 298 | // If we have a fully populated map, or if the client doesn't need one, just |
| 299 | // return what we have. |
| 300 | if (SourceFilesIsComplete || !RequiresCompleteMap) |
| 301 | return SourceFiles; |
| 302 | |
| 303 | // Ok, all of the source file descriptors (compile_unit in dwarf terms), |
| 304 | // should be on the use list of the llvm.dbg.translation_units global. |
| 305 | // |
| 306 | GlobalVariable *Units = |
| 307 | M->getGlobalVariable("llvm.dbg.translation_units", |
| 308 | StructType::get(std::vector<const Type*>())); |
| 309 | if (Units == 0) |
| 310 | throw "Program contains no debugging information!"; |
| 311 | |
| 312 | std::vector<GlobalVariable*> TranslationUnits; |
| 313 | getGlobalVariablesUsing(Units, TranslationUnits); |
| 314 | |
| 315 | SlowOperationInformer SOI("building source files index"); |
| 316 | |
| 317 | // Loop over all of the translation units found, building the SourceFiles |
| 318 | // mapping. |
| 319 | for (unsigned i = 0, e = TranslationUnits.size(); i != e; ++i) { |
| 320 | getSourceFile(TranslationUnits[i]); |
| 321 | SOI.progress(i+1, e); |
| 322 | } |
| 323 | |
| 324 | // Ok, if we got this far, then we indexed the whole program. |
| 325 | SourceFilesIsComplete = true; |
| 326 | return SourceFiles; |
| 327 | } |
| 328 | |
| 329 | /// getSourceFile - Look up the file with the specified name. If there is |
| 330 | /// more than one match for the specified filename, prompt the user to pick |
| 331 | /// one. If there is no source file that matches the specified name, throw |
| 332 | /// an exception indicating that we can't find the file. Otherwise, return |
| 333 | /// the file information for that file. |
| 334 | const SourceFileInfo &ProgramInfo::getSourceFile(const std::string &Filename) { |
| 335 | std::multimap<std::string, SourceFileInfo*>::const_iterator Start, End; |
| 336 | getSourceFiles(); |
| 337 | tie(Start, End) = SourceFileIndex.equal_range(Filename); |
| 338 | |
| 339 | if (Start == End) throw "Could not find source file '" + Filename + "'!"; |
| 340 | const SourceFileInfo &SFI = *Start->second; |
| 341 | ++Start; |
| 342 | if (Start == End) return SFI; |
| 343 | |
| 344 | throw "FIXME: Multiple source files with the same name not implemented!"; |
| 345 | } |
| 346 | |
| 347 | |
| 348 | //===----------------------------------------------------------------------===// |
| 349 | // SourceFunctionInfo tracking... |
| 350 | // |
| 351 | |
| 352 | |
| 353 | /// getFunction - Return function information for the specified function |
| 354 | /// descriptor object, adding it to the collection as needed. This method |
| 355 | /// always succeeds (is unambiguous), and is always efficient. |
| 356 | /// |
| 357 | const SourceFunctionInfo & |
| 358 | ProgramInfo::getFunction(const GlobalVariable *Desc) { |
| 359 | SourceFunctionInfo *&Result = SourceFunctions[Desc]; |
| 360 | if (Result) return *Result; |
| 361 | |
| 362 | // Figure out what language this function comes from... |
| 363 | const GlobalVariable *SourceFileDesc = 0; |
| 364 | if (Desc && Desc->hasInitializer()) |
| 365 | if (ConstantStruct *CS = dyn_cast<ConstantStruct>(Desc->getInitializer())) |
| 366 | if (CS->getNumOperands() > 0) |
| 367 | if (const ConstantPointerRef *CPR = |
| 368 | dyn_cast<ConstantPointerRef>(CS->getOperand(1))) |
| 369 | SourceFileDesc = dyn_cast<GlobalVariable>(CPR->getValue()); |
| 370 | |
| 371 | const SourceLanguage &Lang = getSourceFile(SourceFileDesc).getLanguage(); |
| 372 | return *(Result = Lang.createSourceFunctionInfo(Desc, *this)); |
| 373 | } |
| 374 | |
| 375 | |
| 376 | // getSourceFunctions - Index all of the functions in the program and return |
| 377 | // them. This information is lazily computed the first time that it is |
| 378 | // requested. Since this information can take a long time to compute, the user |
| 379 | // is given a chance to cancel it. If this occurs, an exception is thrown. |
| 380 | const std::map<const GlobalVariable*, SourceFunctionInfo*> & |
| 381 | ProgramInfo::getSourceFunctions(bool RequiresCompleteMap) { |
| 382 | if (SourceFunctionsIsComplete || !RequiresCompleteMap) |
| 383 | return SourceFunctions; |
| 384 | |
| 385 | // Ok, all of the source function descriptors (subprogram in dwarf terms), |
| 386 | // should be on the use list of the llvm.dbg.translation_units global. |
| 387 | // |
| 388 | GlobalVariable *Units = |
| 389 | M->getGlobalVariable("llvm.dbg.globals", |
| 390 | StructType::get(std::vector<const Type*>())); |
| 391 | if (Units == 0) |
| 392 | throw "Program contains no debugging information!"; |
| 393 | |
| 394 | std::vector<GlobalVariable*> Functions; |
| 395 | getGlobalVariablesUsing(Units, Functions); |
| 396 | |
| 397 | SlowOperationInformer SOI("building functions index"); |
| 398 | |
| 399 | // Loop over all of the functions found, building the SourceFunctions mapping. |
| 400 | for (unsigned i = 0, e = Functions.size(); i != e; ++i) { |
| 401 | getFunction(Functions[i]); |
| 402 | SOI.progress(i+1, e); |
| 403 | } |
| 404 | |
| 405 | // Ok, if we got this far, then we indexed the whole program. |
| 406 | SourceFunctionsIsComplete = true; |
| 407 | return SourceFunctions; |
| 408 | } |